summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/CodingGuidelines12
-rw-r--r--Documentation/Makefile24
-rw-r--r--Documentation/RelNotes/2.32.0.txt238
-rw-r--r--Documentation/RelNotes/2.33.0.txt32
-rw-r--r--Documentation/SubmittingPatches11
-rw-r--r--Documentation/config/advice.txt4
-rw-r--r--Documentation/config/checkout.txt21
-rw-r--r--Documentation/config/color.txt5
-rw-r--r--Documentation/config/index.txt5
-rw-r--r--Documentation/config/log.txt5
-rw-r--r--Documentation/config/pack.txt15
-rw-r--r--Documentation/config/push.txt7
-rw-r--r--Documentation/config/stash.txt6
-rw-r--r--Documentation/diff-generate-patch.txt7
-rw-r--r--Documentation/diff-options.txt20
-rw-r--r--Documentation/fetch-options.txt5
-rw-r--r--Documentation/git-am.txt4
-rw-r--r--Documentation/git-apply.txt11
-rw-r--r--Documentation/git-config.txt5
-rw-r--r--Documentation/git-credential.txt4
-rw-r--r--Documentation/git-cvsserver.txt24
-rw-r--r--Documentation/git-describe.txt14
-rw-r--r--Documentation/git-format-patch.txt10
-rw-r--r--Documentation/git-grep.txt64
-rw-r--r--Documentation/git-interpret-trailers.txt94
-rw-r--r--Documentation/git-mailinfo.txt21
-rw-r--r--Documentation/git-maintenance.txt6
-rw-r--r--Documentation/git-mktag.txt16
-rw-r--r--Documentation/git-p4.txt4
-rw-r--r--Documentation/git-rebase.txt20
-rw-r--r--Documentation/git-repack.txt2
-rw-r--r--Documentation/git-rm.txt4
-rw-r--r--Documentation/git-send-email.txt25
-rw-r--r--Documentation/git-sparse-checkout.txt14
-rw-r--r--Documentation/git-stash.txt6
-rw-r--r--Documentation/git-svn.txt38
-rw-r--r--Documentation/git.txt12
-rw-r--r--Documentation/gitattributes.txt8
-rw-r--r--Documentation/gitignore.txt4
-rw-r--r--Documentation/gitmailmap.txt7
-rw-r--r--Documentation/gitmodules.txt8
-rw-r--r--Documentation/gitnamespaces.txt4
-rw-r--r--Documentation/gitweb.conf.txt11
-rw-r--r--Documentation/glossary-content.txt4
-rwxr-xr-xDocumentation/lint-gitlink.perl108
-rwxr-xr-xDocumentation/lint-man-end-blurb.perl24
-rwxr-xr-xDocumentation/lint-man-section-order.perl105
-rw-r--r--Documentation/pretty-formats.txt6
-rw-r--r--Documentation/revisions.txt23
-rw-r--r--Documentation/technical/api-error-handling.txt10
-rw-r--r--Documentation/technical/api-trace2.txt2
-rw-r--r--Documentation/technical/index-format.txt19
-rw-r--r--Documentation/technical/packfile-uri.txt15
-rw-r--r--Documentation/technical/parallel-checkout.txt270
-rw-r--r--Documentation/technical/protocol-v2.txt39
-rw-r--r--Documentation/technical/reftable.txt9
-rw-r--r--Documentation/technical/remembering-renames.txt671
-rw-r--r--Documentation/technical/sparse-index.txt208
-rw-r--r--Documentation/user-manual.txt3
59 files changed, 2155 insertions, 218 deletions
diff --git a/Documentation/CodingGuidelines b/Documentation/CodingGuidelines
index 45465bc0c9..e3af089ecf 100644
--- a/Documentation/CodingGuidelines
+++ b/Documentation/CodingGuidelines
@@ -175,6 +175,11 @@ For shell scripts specifically (not exhaustive):
does not have such a problem.
+ - Even though "local" is not part of POSIX, we make heavy use of it
+ in our test suite. We do not use it in scripted Porcelains, and
+ hopefully nobody starts using "local" before they are reimplemented
+ in C ;-)
+
For C programs:
@@ -498,7 +503,12 @@ Error Messages
- Do not end error messages with a full stop.
- - Do not capitalize ("unable to open %s", not "Unable to open %s")
+ - Do not capitalize the first word, only because it is the first word
+ in the message ("unable to open %s", not "Unable to open %s"). But
+ "SHA-3 not supported" is fine, because the reason the first word is
+ capitalized is not because it is at the beginning of the sentence,
+ but because the word would be spelled in capital letters even when
+ it appeared in the middle of the sentence.
- Say what the error is first ("cannot open %s", not "%s: cannot open")
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 874a01d7a8..2aae4c9cbb 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -2,6 +2,8 @@
MAN1_TXT =
MAN5_TXT =
MAN7_TXT =
+HOWTO_TXT =
+DOC_DEP_TXT =
TECH_DOCS =
ARTICLES =
SP_ARTICLES =
@@ -42,6 +44,11 @@ MAN7_TXT += gittutorial-2.txt
MAN7_TXT += gittutorial.txt
MAN7_TXT += gitworkflows.txt
+HOWTO_TXT += $(wildcard howto/*.txt)
+
+DOC_DEP_TXT += $(wildcard *.txt)
+DOC_DEP_TXT += $(wildcard config/*.txt)
+
ifdef MAN_FILTER
MAN_TXT = $(filter $(MAN_FILTER),$(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT))
else
@@ -91,6 +98,7 @@ TECH_DOCS += technical/multi-pack-index
TECH_DOCS += technical/pack-format
TECH_DOCS += technical/pack-heuristics
TECH_DOCS += technical/pack-protocol
+TECH_DOCS += technical/parallel-checkout
TECH_DOCS += technical/partial-clone
TECH_DOCS += technical/protocol-capabilities
TECH_DOCS += technical/protocol-common
@@ -285,7 +293,7 @@ docdep_prereqs = \
mergetools-list.made $(mergetools_txt) \
cmd-list.made $(cmds_txt)
-doc.dep : $(docdep_prereqs) $(wildcard *.txt) $(wildcard config/*.txt) build-docdep.perl
+doc.dep : $(docdep_prereqs) $(DOC_DEP_TXT) build-docdep.perl
$(QUIET_GEN)$(RM) $@+ $@ && \
$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
mv $@+ $@
@@ -428,9 +436,9 @@ $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
mv $@+ $@
-howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
+howto-index.txt: howto-index.sh $(HOWTO_TXT)
$(QUIET_GEN)$(RM) $@+ $@ && \
- '$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(wildcard howto/*.txt)) >$@+ && \
+ '$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@+ && \
mv $@+ $@
$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
@@ -439,7 +447,7 @@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
WEBDOC_DEST = /pub/software/scm/git/docs
howto/%.html: ASCIIDOC_EXTRA += -a git-relative-html-prefix=../
-$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt GIT-ASCIIDOCFLAGS
+$(patsubst %.txt,%.html,$(HOWTO_TXT)): %.html : %.txt GIT-ASCIIDOCFLAGS
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
sed -e '1,/^$$/d' $< | \
$(TXT_TO_HTML) - >$@+ && \
@@ -471,7 +479,13 @@ print-man1:
@for i in $(MAN1_TXT); do echo $$i; done
lint-docs::
- $(QUIET_LINT)$(PERL_PATH) lint-gitlink.perl
+ $(QUIET_LINT)$(PERL_PATH) lint-gitlink.perl \
+ $(HOWTO_TXT) $(DOC_DEP_TXT) \
+ --section=1 $(MAN1_TXT) \
+ --section=5 $(MAN5_TXT) \
+ --section=7 $(MAN7_TXT); \
+ $(PERL_PATH) lint-man-end-blurb.perl $(MAN_TXT); \
+ $(PERL_PATH) lint-man-section-order.perl $(MAN_TXT);
ifeq ($(wildcard po/Makefile),po/Makefile)
doc-l10n install-l10n::
diff --git a/Documentation/RelNotes/2.32.0.txt b/Documentation/RelNotes/2.32.0.txt
index 5c329d5a1b..87d56fa1aa 100644
--- a/Documentation/RelNotes/2.32.0.txt
+++ b/Documentation/RelNotes/2.32.0.txt
@@ -7,6 +7,13 @@ Backward compatibility notes
* ".gitattributes", ".gitignore", and ".mailmap" files that are
symbolic links are ignored.
+ * "git apply --3way" used to first attempt a straight application,
+ and only fell back to the 3-way merge algorithm when the stright
+ application failed. Starting with this version, the command will
+ first try the 3-way merge algorithm and only when it fails (either
+ resulting with conflict or the base versions of blobs are missing),
+ falls back to the usual patch application.
+
Updates since v2.31
-------------------
@@ -40,11 +47,6 @@ UI, Workflows & Features
tweak both the message and the contents, and only the message,
respectively.
- * When accessing a server with a URL like https://user:pass@site/, we
- did not to fall back to the basic authentication with the
- credential material embedded in the URL after the "Negotiate"
- authentication failed. Now we do.
-
* "git send-email" learned to honor the core.hooksPath configuration.
* "git format-patch -v<n>" learned to allow a reroll count that is
@@ -57,6 +59,74 @@ UI, Workflows & Features
* "git clone --reject-shallow" option fails the clone as soon as we
notice that we are cloning from a shallow repository.
+ * A configuration variable has been added to force tips of certain
+ refs to be given a reachability bitmap.
+
+ * "gitweb" learned "e-mail privacy" feature to redact strings that
+ look like e-mail addresses on various pages.
+
+ * "git apply --3way" has always been "to fall back to 3-way merge
+ only when straight application fails". Swap the order of falling
+ back so that 3-way is always attempted first (only when the option
+ is given, of course) and then straight patch application is used as
+ a fallback when it fails.
+
+ * "git apply" now takes "--3way" and "--cached" at the same time, and
+ work and record results only in the index.
+
+ * The command line completion (in contrib/) has learned that
+ CHERRY_PICK_HEAD is a possible pseudo-ref.
+
+ * Userdiff patterns for "Scheme" has been added.
+
+ * "git log" learned "--diff-merges=<style>" option, with an
+ associated configuration variable log.diffMerges.
+
+ * "git log --format=..." placeholders learned %ah/%ch placeholders to
+ request the --date=human output.
+
+ * Replace GIT_CONFIG_NOSYSTEM mechanism to decline from reading the
+ system-wide configuration file with GIT_CONFIG_SYSTEM that lets
+ users specify from which file to read the system-wide configuration
+ (setting it to an empty file would essentially be the same as
+ setting NOSYSTEM), and introduce GIT_CONFIG_GLOBAL to override the
+ per-user configuration in $HOME/.gitconfig.
+
+ * "git add" and "git rm" learned not to touch those paths that are
+ outside of sparse checkout.
+
+ * "git rev-list" learns the "--filter=object:type=<type>" option,
+ which can be used to exclude objects of the given kind from the
+ packfile generated by pack-objects.
+
+ * The command line completion (in contrib/) for "git stash" has been
+ updated.
+
+ * "git subtree" updates.
+
+ * It is now documented that "format-patch" skips merges.
+
+ * Options to "git pack-objects" that take numeric values like
+ --window and --depth should not accept negative values; the input
+ validation has been tightened.
+
+ * The way the command line specified by the trailer.<token>.command
+ configuration variable receives the end-user supplied value was
+ both error prone and misleading. An alternative to achieve the
+ same goal in a safer and more intuitive way has been added, as
+ the trailer.<token>.cmd configuration variable, to replace it.
+
+ * "git add -i --dry-run" does not dry-run, which was surprising. The
+ combination of options has taught to error out.
+
+ * "git push" learns to discover common ancestor with the receiving
+ end over protocol v2. This will hopefully make "git push" as
+ efficient as "git fetch" in avoiding objects from getting
+ transferred unnecessarily.
+
+ * "git mailinfo" (hence "git am") learned the "--quoted-cr" option to
+ control how lines ending with CRLF wrapped in base64 or qp are
+ handled.
Performance, Internal Implementation, Development Support etc.
@@ -98,6 +168,43 @@ Performance, Internal Implementation, Development Support etc.
* Generate [ec]tags under $(QUIET_GEN).
+ * Clean-up codepaths that implements "git send-email --validate"
+ option and improves the message from it.
+
+ * The last remnant of gettext-poison has been removed.
+
+ * The test framework has been taught to optionally turn the default
+ merge strategy to "ort" throughout the system where we use
+ three-way merges internally, like cherry-pick, rebase etc.,
+ primarily to enhance its test coverage (the strategy has been
+ available as an explicit "-s ort" choice).
+
+ * A bit of code clean-up and a lot of test clean-up around userdiff
+ area.
+
+ * Handling of "promisor packs" that allows certain objects to be
+ missing and lazily retrievable has been optimized (a bit).
+
+ * When packet_write() fails, we gave an extra error message
+ unnecessarily, which has been corrected.
+
+ * The checkout machinery has been taught to perform the actual
+ write-out of the files in parallel when able.
+
+ * Show errno in the trace output in the error codepath that calls
+ read_raw_ref method.
+
+ * Effort to make the command line completion (in contrib/) safe with
+ "set -u" continues.
+
+ * Tweak a few tests for "log --format=..." that show timestamps in
+ various formats.
+
+ * The reflog expiry machinery has been taught to emit trace events.
+
+ * Over-the-wire protocol learns a new request type to ask for object
+ sizes given a list of object names.
+
Fixes since v2.31
-----------------
@@ -174,6 +281,115 @@ Fixes since v2.31
as directory separator.
(merge 9a7f1ce8b7 rs/daemon-sanitize-dir-sep later to maint).
+ * A NULL-dereference bug has been corrected in an error codepath in
+ "git for-each-ref", "git branch --list" etc.
+ (merge c685450880 jk/ref-filter-segfault-fix later to maint).
+
+ * Streamline the codepath to fix the UTF-8 encoding issues in the
+ argv[] and the prefix on macOS.
+ (merge c7d0e61016 tb/precompose-prefix-simplify later to maint).
+
+ * The command-line completion script (in contrib/) had a couple of
+ references that would have given a warning under the "-u" (nounset)
+ option.
+ (merge c5c0548d79 vs/completion-with-set-u later to maint).
+
+ * When "git pack-objects" makes a literal copy of a part of existing
+ packfile using the reachability bitmaps, its update to the progress
+ meter was broken.
+ (merge 8e118e8490 jk/pack-objects-bitmap-progress-fix later to maint).
+
+ * The dependencies for config-list.h and command-list.h were broken
+ when the former was split out of the latter, which has been
+ corrected.
+ (merge 56550ea718 sg/bugreport-fixes later to maint).
+
+ * "git push --quiet --set-upstream" was not quiet when setting the
+ upstream branch configuration, which has been corrected.
+ (merge f3cce896a8 ow/push-quiet-set-upstream later to maint).
+
+ * The prefetch task in "git maintenance" assumed that "git fetch"
+ from any remote would fetch all its local branches, which would
+ fetch too much if the user is interested in only a subset of
+ branches there.
+ (merge 32f67888d8 ds/maintenance-prefetch-fix later to maint).
+
+ * Clarify that pathnames recorded in Git trees are most often (but
+ not necessarily) encoded in UTF-8.
+ (merge 9364bf465d ab/pathname-encoding-doc later to maint).
+
+ * "git --config-env var=val cmd" weren't accepted (only
+ --config-env=var=val was).
+ (merge c331551ccf ps/config-env-option-with-separate-value later to maint).
+
+ * When the reachability bitmap is in effect, the "do not lose
+ recently created objects and those that are reachable from them"
+ safety to protect us from races were disabled by mistake, which has
+ been corrected.
+ (merge 2ba582ba4c jk/prune-with-bitmap-fix later to maint).
+
+ * Cygwin pathname handling fix.
+ (merge bccc37fdc7 ad/cygwin-no-backslashes-in-paths later to maint).
+
+ * "git rebase --[no-]reschedule-failed-exec" did not work well with
+ its configuration variable, which has been corrected.
+ (merge e5b32bffd1 ab/rebase-no-reschedule-failed-exec later to maint).
+
+ * Portability fix for command line completion script (in contrib/).
+ (merge f2acf763e2 si/zsh-complete-comment-fix later to maint).
+
+ * "git repack -A -d" in a partial clone unnecessarily loosened
+ objects in promisor pack.
+
+ * "git bisect skip" when custom words are used for new/old did not
+ work, which has been corrected.
+
+ * A few variants of informational message "Already up-to-date" has
+ been rephrased.
+ (merge ad9322da03 js/merge-already-up-to-date-message-reword later to maint).
+
+ * "git submodule update --quiet" did not propagate the quiet option
+ down to underlying "git fetch", which has been corrected.
+ (merge 62af4bdd42 nc/submodule-update-quiet later to maint).
+
+ * Document that our test can use "local" keyword.
+ (merge a84fd3bcc6 jc/test-allows-local later to maint).
+
+ * The word-diff mode has been taught to work better with a word
+ regexp that can match an empty string.
+ (merge 0324e8fc6b pw/word-diff-zero-width-matches later to maint).
+
+ * "git p4" learned to find branch points more efficiently.
+ (merge 6b79818bfb jk/p4-locate-branch-point-optim later to maint).
+
+ * When "git update-ref -d" removes a ref that is packed, it left
+ empty directories under $GIT_DIR/refs/ for
+ (merge 5f03e5126d wc/packed-ref-removal-cleanup later to maint).
+
+ * "git clean" and "git ls-files -i" had confusion around working on
+ or showing ignored paths inside an ignored directory, which has
+ been corrected.
+ (merge b548f0f156 en/dir-traversal later to maint).
+
+ * The handling of "%(push)" formatting element of "for-each-ref" and
+ friends was broken when the same codepath started handling
+ "%(push:<what>)", which has been corrected.
+ (merge 1e1c4c5eac zh/ref-filter-push-remote-fix later to maint).
+
+ * The bash prompt script (in contrib/) did not work under "set -u".
+ (merge 5c0cbdb107 en/prompt-under-set-u later to maint).
+
+ * The "chainlint" feature in the test framework is a handy way to
+ catch common mistakes in writing new tests, but tends to get
+ expensive. An knob to selectively disable it has been introduced
+ to help running tests that the developer has not modified.
+ (merge 2d86a96220 jk/test-chainlint-softer later to maint).
+
+ * The "rev-parse" command did not diagnose the lack of argument to
+ "--path-format" option, which was introduced in v2.31 era, which
+ has been corrected.
+ (merge 99fc555188 wm/rev-parse-path-format-wo-arg later to maint).
+
* Other code cleanup, docfix, build fix, etc.
(merge f451960708 dl/cat-file-doc-cleanup later to maint).
(merge 12604a8d0c sv/t9801-test-path-is-file-cleanup later to maint).
@@ -186,3 +402,15 @@ Fixes since v2.31
(merge 2be927f3d1 ab/diff-no-index-tests later to maint).
(merge 76593c09bb ab/detox-gettext-tests later to maint).
(merge 28e29ee38b jc/doc-format-patch-clarify later to maint).
+ (merge fc12b6fdde fm/user-manual-use-preface later to maint).
+ (merge dba94e3a85 cc/test-helper-bloom-usage-fix later to maint).
+ (merge 61a7660516 hn/reftable-tables-doc-update later to maint).
+ (merge 81ed96a9b2 jt/fetch-pack-request-fix later to maint).
+ (merge 151b6c2dd7 jc/doc-do-not-capitalize-clarification later to maint).
+ (merge 9160068ac6 js/access-nul-emulation-on-windows later to maint).
+ (merge 7a14acdbe6 po/diff-patch-doc later to maint).
+ (merge f91371b948 pw/patience-diff-clean-up later to maint).
+ (merge 3a7f0908b6 mt/clean-clean later to maint).
+ (merge d4e2d15a8b ab/streaming-simplify later to maint).
+ (merge 0e59f7ad67 ah/merge-ort-i18n later to maint).
+ (merge e6f68f62e0 ls/typofix later to maint).
diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt
new file mode 100644
index 0000000000..6795a2734f
--- /dev/null
+++ b/Documentation/RelNotes/2.33.0.txt
@@ -0,0 +1,32 @@
+Git 2.33 Release Notes
+======================
+
+Updates since Git 2.32
+----------------------
+
+UI, Workflows & Features
+
+Performance, Internal Implementation, Development Support etc.
+
+
+Fixes since v2.31
+-----------------
+
+ * We historically rejected a very short string as an author name
+ while accepting a patch e-mail, which has been loosened.
+ (merge 72ee47ceeb ef/mailinfo-short-name later to maint).
+
+ * The parallel checkout codepath did not initialize object ID field
+ used to talk to the worker processes in a futureproof way.
+
+ * Rewrite code that triggers undefined behaiour warning.
+ (merge aafa5df0df jn/size-t-casted-to-off-t-fix later to maint).
+
+ * The description of "fast-forward" in the glossary has been updated.
+ (merge e22f2daed0 ry/clarify-fast-forward-in-glossary later to maint).
+
+ * Other code cleanup, docfix, build fix, etc.
+ (merge bfe35a6165 ah/doc-describe later to maint).
+ (merge f302c1e4aa jc/clarify-revision-range later to maint).
+ (merge 3127ff90ea tl/fix-packfile-uri-doc later to maint).
+ (merge a84216c684 jk/doc-color-pager later to maint).
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 0452db2e67..55287d72e0 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -117,10 +117,13 @@ If in doubt which identifier to use, run `git log --no-merges` on the
files you are modifying to see the current conventions.
[[summary-section]]
-It's customary to start the remainder of the first line after "area: "
-with a lower-case letter. E.g. "doc: clarify...", not "doc:
-Clarify...", or "githooks.txt: improve...", not "githooks.txt:
-Improve...".
+The title sentence after the "area:" prefix omits the full stop at the
+end, and its first word is not capitalized unless there is a reason to
+capitalize it other than because it is the first word in the sentence.
+E.g. "doc: clarify...", not "doc: Clarify...", or "githooks.txt:
+improve...", not "githooks.txt: Improve...". But "refs: HEAD is also
+treated as a ref" is correct, as we spell `HEAD` in all caps even when
+it appears in the middle of a sentence.
[[meaningful-message]]
The body should provide a meaningful commit message, which:
diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt
index acbd0c09aa..8b2849ff7b 100644
--- a/Documentation/config/advice.txt
+++ b/Documentation/config/advice.txt
@@ -119,4 +119,8 @@ advice.*::
addEmptyPathspec::
Advice shown if a user runs the add command without providing
the pathspec parameter.
+ updateSparsePath::
+ Advice shown when either linkgit:git-add[1] or linkgit:git-rm[1]
+ is asked to update index entries outside the current sparse
+ checkout.
--
diff --git a/Documentation/config/checkout.txt b/Documentation/config/checkout.txt
index 2cddf7b4b4..bfbca90f0e 100644
--- a/Documentation/config/checkout.txt
+++ b/Documentation/config/checkout.txt
@@ -21,3 +21,24 @@ checkout.guess::
Provides the default value for the `--guess` or `--no-guess`
option in `git checkout` and `git switch`. See
linkgit:git-switch[1] and linkgit:git-checkout[1].
+
+checkout.workers::
+ The number of parallel workers to use when updating the working tree.
+ The default is one, i.e. sequential execution. If set to a value less
+ than one, Git will use as many workers as the number of logical cores
+ available. This setting and `checkout.thresholdForParallelism` affect
+ all commands that perform checkout. E.g. checkout, clone, reset,
+ sparse-checkout, etc.
++
+Note: parallel checkout usually delivers better performance for repositories
+located on SSDs or over NFS. For repositories on spinning disks and/or machines
+with a small number of cores, the default sequential checkout often performs
+better. The size and compression level of a repository might also influence how
+well the parallel version performs.
+
+checkout.thresholdForParallelism::
+ When running parallel checkout with a small number of files, the cost
+ of subprocess spawning and inter-process communication might outweigh
+ the parallelization gains. This setting allows to define the minimum
+ number of files for which parallel checkout should be attempted. The
+ default is 100.
diff --git a/Documentation/config/color.txt b/Documentation/config/color.txt
index d5daacb13a..e05d520a86 100644
--- a/Documentation/config/color.txt
+++ b/Documentation/config/color.txt
@@ -127,8 +127,9 @@ color.interactive.<slot>::
interactive commands.
color.pager::
- A boolean to enable/disable colored output when the pager is in
- use (default is true).
+ A boolean to specify whether `auto` color modes should colorize
+ output going to the pager. Defaults to true; set this to false
+ if your pager does not understand ANSI color codes.
color.push::
A boolean to enable/disable color in push errors. May be set to
diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt
index 7cb50b37e9..75f3a2d105 100644
--- a/Documentation/config/index.txt
+++ b/Documentation/config/index.txt
@@ -14,6 +14,11 @@ index.recordOffsetTable::
Defaults to 'true' if index.threads has been explicitly enabled,
'false' otherwise.
+index.sparse::
+ When enabled, write the index using sparse-directory entries. This
+ has no effect unless `core.sparseCheckout` and
+ `core.sparseCheckoutCone` are both enabled. Defaults to 'false'.
+
index.threads::
Specifies the number of threads to spawn when loading the index.
This is meant to reduce index load time on multiprocessor machines.
diff --git a/Documentation/config/log.txt b/Documentation/config/log.txt
index 208d5fdcaa..456eb07800 100644
--- a/Documentation/config/log.txt
+++ b/Documentation/config/log.txt
@@ -24,6 +24,11 @@ log.excludeDecoration::
the config option can be overridden by the `--decorate-refs`
option.
+log.diffMerges::
+ Set default diff format to be used for merge commits. See
+ `--diff-merges` in linkgit:git-log[1] for details.
+ Defaults to `separate`.
+
log.follow::
If `true`, `git log` will act as if the `--follow` option was used when
a single <path> is given. This has the same limitations as `--follow`,
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index 3da4ea98e2..c0844d8d8e 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -122,6 +122,21 @@ pack.useSparse::
commits contain certain types of direct renames. Default is
`true`.
+pack.preferBitmapTips::
+ When selecting which commits will receive bitmaps, prefer a
+ commit at the tip of any reference that is a suffix of any value
+ of this configuration over any other commits in the "selection
+ window".
++
+Note that setting this configuration to `refs/foo` does not mean that
+the commits at the tips of `refs/foo/bar` and `refs/foo/baz` will
+necessarily be selected. This is because commits are selected for
+bitmaps from within a series of windows of variable length.
++
+If a commit at the tip of any reference which is a suffix of any value
+of this configuration is seen in a window, it is immediately given
+preference over any other commit in that window.
+
pack.writeBitmaps (deprecated)::
This is a deprecated synonym for `repack.writeBitmaps`.
diff --git a/Documentation/config/push.txt b/Documentation/config/push.txt
index 21b256e0a4..f2667b2689 100644
--- a/Documentation/config/push.txt
+++ b/Documentation/config/push.txt
@@ -120,3 +120,10 @@ push.useForceIfIncludes::
`--force-if-includes` as an option to linkgit:git-push[1]
in the command line. Adding `--no-force-if-includes` at the
time of push overrides this configuration setting.
+
+push.negotiate::
+ If set to "true", attempt to reduce the size of the packfile
+ sent by rounds of negotiation in which the client and the
+ server attempt to find commits in common. If "false", Git will
+ rely solely on the server's ref advertisement to find commits
+ in common.
diff --git a/Documentation/config/stash.txt b/Documentation/config/stash.txt
index 413f907cba..9ed775281f 100644
--- a/Documentation/config/stash.txt
+++ b/Documentation/config/stash.txt
@@ -6,9 +6,9 @@ stash.useBuiltin::
remaining users that setting this now does nothing.
stash.showIncludeUntracked::
- If this is set to true, the `git stash show` command without an
- option will show the untracked files of a stash entry. Defaults to
- false. See description of 'show' command in linkgit:git-stash[1].
+ If this is set to true, the `git stash show` command will show
+ the untracked files of a stash entry. Defaults to false. See
+ description of 'show' command in linkgit:git-stash[1].
stash.showPatch::
If this is set to true, the `git stash show` command without an
diff --git a/Documentation/diff-generate-patch.txt b/Documentation/diff-generate-patch.txt
index 2db8eacc3e..c78063d4f7 100644
--- a/Documentation/diff-generate-patch.txt
+++ b/Documentation/diff-generate-patch.txt
@@ -11,7 +11,7 @@ linkgit:git-diff-files[1]
with the `-p` option produces patch text.
You can customize the creation of patch text via the
`GIT_EXTERNAL_DIFF` and the `GIT_DIFF_OPTS` environment variables
-(see linkgit:git[1]).
+(see linkgit:git[1]), and the `diff` attribute (see linkgit:gitattributes[5]).
What the -p option produces is slightly different from the traditional
diff format:
@@ -74,6 +74,11 @@ separate lines indicate the old and the new mode.
rename from b
rename to a
+5. Hunk headers mention the name of the function to which the hunk
+ applies. See "Defining a custom hunk-header" in
+ linkgit:gitattributes[5] for details of how to tailor to this to
+ specific languages.
+
Combined diff format
--------------------
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index aa2b5c11f2..32e6dee5ac 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -34,7 +34,7 @@ endif::git-diff[]
endif::git-format-patch[]
ifdef::git-log[]
---diff-merges=(off|none|first-parent|1|separate|m|combined|c|dense-combined|cc)::
+--diff-merges=(off|none|on|first-parent|1|separate|m|combined|c|dense-combined|cc)::
--no-diff-merges::
Specify diff format to be used for merge commits. Default is
{diff-merges-default} unless `--first-parent` is in use, in which case
@@ -45,17 +45,24 @@ ifdef::git-log[]
Disable output of diffs for merge commits. Useful to override
implied value.
+
+--diff-merges=on:::
+--diff-merges=m:::
+-m:::
+ This option makes diff output for merge commits to be shown in
+ the default format. The default format could be changed using
+ `log.diffMerges` configuration parameter, which default value
+ is `separate`. `-m` implies `-p`.
++
--diff-merges=first-parent:::
--diff-merges=1:::
This option makes merge commits show the full diff with
respect to the first parent only.
+
--diff-merges=separate:::
---diff-merges=m:::
--m:::
This makes merge commits show the full diff with respect to
each of the parents. Separate log entry and diff is generated
- for each parent. `-m` doesn't produce any output without `-p`.
+ for each parent. This is the format that `-m` produced
+ historically.
+
--diff-merges=combined:::
--diff-merges=c:::
@@ -293,11 +300,14 @@ explained for the configuration variable `core.quotePath` (see
linkgit:git-config[1]).
--name-only::
- Show only names of changed files.
+ Show only names of changed files. The file names are often encoded in UTF-8.
+ For more information see the discussion about encoding in the linkgit:git-log[1]
+ manual page.
--name-status::
Show only names and status of changed files. See the description
of the `--diff-filter` option on what the status letters mean.
+ Just like `--name-only` the file names are often encoded in UTF-8.
--submodule[=<format>]::
Specify how differences in submodules are shown. When specifying
diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 07783deee3..9e7b4e189c 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -110,6 +110,11 @@ ifndef::git-pull[]
setting `fetch.writeCommitGraph`.
endif::git-pull[]
+--prefetch::
+ Modify the configured refspec to place all refs into the
+ `refs/prefetch/` namespace. See the `prefetch` task in
+ linkgit:git-maintenance[1].
+
-p::
--prune::
Before fetching, remove any remote-tracking references that no
diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt
index decd8ae122..8714dfcb76 100644
--- a/Documentation/git-am.txt
+++ b/Documentation/git-am.txt
@@ -15,6 +15,7 @@ SYNOPSIS
[--whitespace=<option>] [-C<n>] [-p<n>] [--directory=<dir>]
[--exclude=<path>] [--include=<path>] [--reject] [-q | --quiet]
[--[no-]scissors] [-S[<keyid>]] [--patch-format=<format>]
+ [--quoted-cr=<action>]
[(<mbox> | <Maildir>)...]
'git am' (--continue | --skip | --abort | --quit | --show-current-patch[=(diff|raw)])
@@ -59,6 +60,9 @@ OPTIONS
--no-scissors::
Ignore scissors lines (see linkgit:git-mailinfo[1]).
+--quoted-cr=<action>::
+ This flag will be passed down to 'git mailinfo' (see linkgit:git-mailinfo[1]).
+
-m::
--message-id::
Pass the `-m` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]),
diff --git a/Documentation/git-apply.txt b/Documentation/git-apply.txt
index 91d9a8601c..aa1ae56a25 100644
--- a/Documentation/git-apply.txt
+++ b/Documentation/git-apply.txt
@@ -84,12 +84,13 @@ OPTIONS
-3::
--3way::
- When the patch does not apply cleanly, fall back on 3-way merge if
- the patch records the identity of blobs it is supposed to apply to,
- and we have those blobs available locally, possibly leaving the
+ Attempt 3-way merge if the patch records the identity of blobs it is supposed
+ to apply to and we have those blobs available locally, possibly leaving the
conflict markers in the files in the working tree for the user to
- resolve. This option implies the `--index` option, and is incompatible
- with the `--reject` and the `--cached` options.
+ resolve. This option implies the `--index` option unless the
+ `--cached` option is used, and is incompatible with the `--reject` option.
+ When used with the `--cached` option, any conflicts are left at higher stages
+ in the cache.
--build-fake-ancestor=<file>::
Newer 'git diff' output has embedded 'index information'
diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt
index 4b4cc5c5e8..5cddadafd2 100644
--- a/Documentation/git-config.txt
+++ b/Documentation/git-config.txt
@@ -340,6 +340,11 @@ GIT_CONFIG::
Using the "--global" option forces this to ~/.gitconfig. Using the
"--system" option forces this to $(prefix)/etc/gitconfig.
+GIT_CONFIG_GLOBAL::
+GIT_CONFIG_SYSTEM::
+ Take the configuration from the given files instead from global or
+ system-level configuration. See linkgit:git[1] for details.
+
GIT_CONFIG_NOSYSTEM::
Whether to skip reading settings from the system-wide
$(prefix)/etc/gitconfig file. See linkgit:git[1] for details.
diff --git a/Documentation/git-credential.txt b/Documentation/git-credential.txt
index 31c81c4c02..206e3c5f40 100644
--- a/Documentation/git-credential.txt
+++ b/Documentation/git-credential.txt
@@ -159,3 +159,7 @@ empty string.
+
Components which are missing from the URL (e.g., there is no
username in the example above) will be left unset.
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/git-cvsserver.txt b/Documentation/git-cvsserver.txt
index 1b1c71ad9d..f2e4a47ebe 100644
--- a/Documentation/git-cvsserver.txt
+++ b/Documentation/git-cvsserver.txt
@@ -24,6 +24,18 @@ Usage:
[verse]
'git-cvsserver' [<options>] [pserver|server] [<directory> ...]
+DESCRIPTION
+-----------
+
+This application is a CVS emulation layer for Git.
+
+It is highly functional. However, not all methods are implemented,
+and for those methods that are implemented,
+not all switches are implemented.
+
+Testing has been done using both the CLI CVS client, and the Eclipse CVS
+plugin. Most functionality works fine with both of these clients.
+
OPTIONS
-------
@@ -57,18 +69,6 @@ access still needs to be enabled by the `gitcvs.enabled` config option
unless `--export-all` was given, too.
-DESCRIPTION
------------
-
-This application is a CVS emulation layer for Git.
-
-It is highly functional. However, not all methods are implemented,
-and for those methods that are implemented,
-not all switches are implemented.
-
-Testing has been done using both the CLI CVS client, and the Eclipse CVS
-plugin. Most functionality works fine with both of these clients.
-
LIMITATIONS
-----------
diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt
index a88f6ae2c6..c6a79c2a0f 100644
--- a/Documentation/git-describe.txt
+++ b/Documentation/git-describe.txt
@@ -63,9 +63,10 @@ OPTIONS
Automatically implies --tags.
--abbrev=<n>::
- Instead of using the default 7 hexadecimal digits as the
- abbreviated object name, use <n> digits, or as many digits
- as needed to form a unique object name. An <n> of 0
+ Instead of using the default number of hexadecimal digits (which
+ will vary according to the number of objects in the repository with
+ a default of 7) of the abbreviated object name, use <n> digits, or
+ as many digits as needed to form a unique object name. An <n> of 0
will suppress long format, only showing the closest tag.
--candidates=<n>::
@@ -139,8 +140,11 @@ at the end.
The number of additional commits is the number
of commits which would be displayed by "git log v1.0.4..parent".
-The hash suffix is "-g" + unambiguous abbreviation for the tip commit
-of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`).
+The hash suffix is "-g" + an unambigous abbreviation for the tip commit
+of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). The
+length of the abbreviation scales as the repository grows, using the
+approximate number of objects in the repository and a bit of math
+around the birthday paradox, and defaults to a minimum of 7.
The "g" prefix stands for "git" and is used to allow describing the version of
a software depending on the SCM the software is managed with. This is useful
in an environment where people may use different SCMs.
diff --git a/Documentation/git-format-patch.txt b/Documentation/git-format-patch.txt
index 911da181a1..fe2f69d36e 100644
--- a/Documentation/git-format-patch.txt
+++ b/Documentation/git-format-patch.txt
@@ -36,7 +36,7 @@ SYNOPSIS
DESCRIPTION
-----------
-Prepare each commit with its "patch" in
+Prepare each non-merge commit with its "patch" in
one "message" per commit, formatted to resemble a UNIX mailbox.
The output of this command is convenient for e-mail submission or
for use with 'git am'.
@@ -740,6 +740,14 @@ use it only when you know the recipient uses Git to apply your patch.
$ git format-patch -3
------------
+CAVEATS
+-------
+
+Note that `format-patch` will omit merge commits from the output, even
+if they are part of the requested range. A simple "patch" does not
+include enough information for the receiving end to reproduce the same
+merge commit.
+
SEE ALSO
--------
linkgit:git-am[1], linkgit:git-send-email[1]
diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 4e0ba8234a..3d393fbac1 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -38,38 +38,6 @@ are lists of one or more search expressions separated by newline
characters. An empty string as search expression matches all lines.
-CONFIGURATION
--------------
-
-grep.lineNumber::
- If set to true, enable `-n` option by default.
-
-grep.column::
- If set to true, enable the `--column` option by default.
-
-grep.patternType::
- Set the default matching behavior. Using a value of 'basic', 'extended',
- 'fixed', or 'perl' will enable the `--basic-regexp`, `--extended-regexp`,
- `--fixed-strings`, or `--perl-regexp` option accordingly, while the
- value 'default' will return to the default matching behavior.
-
-grep.extendedRegexp::
- If set to true, enable `--extended-regexp` option by default. This
- option is ignored when the `grep.patternType` option is set to a value
- other than 'default'.
-
-grep.threads::
- Number of grep worker threads to use. If unset (or set to 0), Git will
- use as many threads as the number of logical cores available.
-
-grep.fullName::
- If set to true, enable `--full-name` option by default.
-
-grep.fallbackToNoIndex::
- If set to true, fall back to git grep --no-index if git grep
- is executed outside of a git repository. Defaults to false.
-
-
OPTIONS
-------
--cached::
@@ -363,6 +331,38 @@ with multiple threads might perform slower than single threaded if `--textconv`
is given and there're too many text conversions. So if you experience low
performance in this case, it might be desirable to use `--threads=1`.
+CONFIGURATION
+-------------
+
+grep.lineNumber::
+ If set to true, enable `-n` option by default.
+
+grep.column::
+ If set to true, enable the `--column` option by default.
+
+grep.patternType::
+ Set the default matching behavior. Using a value of 'basic', 'extended',
+ 'fixed', or 'perl' will enable the `--basic-regexp`, `--extended-regexp`,
+ `--fixed-strings`, or `--perl-regexp` option accordingly, while the
+ value 'default' will return to the default matching behavior.
+
+grep.extendedRegexp::
+ If set to true, enable `--extended-regexp` option by default. This
+ option is ignored when the `grep.patternType` option is set to a value
+ other than 'default'.
+
+grep.threads::
+ Number of grep worker threads to use. If unset (or set to 0), Git will
+ use as many threads as the number of logical cores available.
+
+grep.fullName::
+ If set to true, enable `--full-name` option by default.
+
+grep.fallbackToNoIndex::
+ If set to true, fall back to git grep --no-index if git grep
+ is executed outside of a git repository. Defaults to false.
+
+
GIT
---
Part of the linkgit:git[1] suite
diff --git a/Documentation/git-interpret-trailers.txt b/Documentation/git-interpret-trailers.txt
index 96ec6499f0..956a01d184 100644
--- a/Documentation/git-interpret-trailers.txt
+++ b/Documentation/git-interpret-trailers.txt
@@ -232,25 +232,38 @@ trailer.<token>.ifmissing::
that option for trailers with the specified <token>.
trailer.<token>.command::
- This option can be used to specify a shell command that will
- be called to automatically add or modify a trailer with the
- specified <token>.
+ This option behaves in the same way as 'trailer.<token>.cmd', except
+ that it doesn't pass anything as argument to the specified command.
+ Instead the first occurrence of substring $ARG is replaced by the
+ value that would be passed as argument.
+
-When this option is specified, the behavior is as if a special
-'<token>=<value>' argument were added at the beginning of the command
-line, where <value> is taken to be the standard output of the
-specified command with any leading and trailing whitespace trimmed
-off.
+The 'trailer.<token>.command' option has been deprecated in favor of
+'trailer.<token>.cmd' due to the fact that $ARG in the user's command is
+only replaced once and that the original way of replacing $ARG is not safe.
+
-If the command contains the `$ARG` string, this string will be
-replaced with the <value> part of an existing trailer with the same
-<token>, if any, before the command is launched.
+When both 'trailer.<token>.cmd' and 'trailer.<token>.command' are given
+for the same <token>, 'trailer.<token>.cmd' is used and
+'trailer.<token>.command' is ignored.
+
+trailer.<token>.cmd::
+ This option can be used to specify a shell command that will be called:
+ once to automatically add a trailer with the specified <token>, and then
+ each time a '--trailer <token>=<value>' argument to modify the <value> of
+ the trailer that this option would produce.
+
-If some '<token>=<value>' arguments are also passed on the command
-line, when a 'trailer.<token>.command' is configured, the command will
-also be executed for each of these arguments. And the <value> part of
-these arguments, if any, will be used to replace the `$ARG` string in
-the command.
+When the specified command is first called to add a trailer
+with the specified <token>, the behavior is as if a special
+'--trailer <token>=<value>' argument was added at the beginning
+of the "git interpret-trailers" command, where <value>
+is taken to be the standard output of the command with any
+leading and trailing whitespace trimmed off.
++
+If some '--trailer <token>=<value>' arguments are also passed
+on the command line, the command is called again once for each
+of these arguments with the same <token>. And the <value> part
+of these arguments, if any, will be passed to the command as its
+first argument. This way the command can produce a <value> computed
+from the <value> passed in the '--trailer <token>=<value>' argument.
EXAMPLES
--------
@@ -333,6 +346,55 @@ subject
Fix #42
------------
+* Configure a 'help' trailer with a cmd use a script `glog-find-author`
+ which search specified author identity from git log in git repository
+ and show how it works:
++
+------------
+$ cat ~/bin/glog-find-author
+#!/bin/sh
+test -n "$1" && git log --author="$1" --pretty="%an <%ae>" -1 || true
+$ git config trailer.help.key "Helped-by: "
+$ git config trailer.help.ifExists "addIfDifferentNeighbor"
+$ git config trailer.help.cmd "~/bin/glog-find-author"
+$ git interpret-trailers --trailer="help:Junio" --trailer="help:Couder" <<EOF
+> subject
+>
+> message
+>
+> EOF
+subject
+
+message
+
+Helped-by: Junio C Hamano <gitster@pobox.com>
+Helped-by: Christian Couder <christian.couder@gmail.com>
+------------
+
+* Configure a 'ref' trailer with a cmd use a script `glog-grep`
+ to grep last relevant commit from git log in the git repository
+ and show how it works:
++
+------------
+$ cat ~/bin/glog-grep
+#!/bin/sh
+test -n "$1" && git log --grep "$1" --pretty=reference -1 || true
+$ git config trailer.ref.key "Reference-to: "
+$ git config trailer.ref.ifExists "replace"
+$ git config trailer.ref.cmd "~/bin/glog-grep"
+$ git interpret-trailers --trailer="ref:Add copyright notices." <<EOF
+> subject
+>
+> message
+>
+> EOF
+subject
+
+message
+
+Reference-to: 8bc9a0c769 (Add copyright notices., 2005-04-07)
+------------
+
* Configure a 'see' trailer with a command to show the subject of a
commit that is related, and show how it works:
+
diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt
index d343f040f5..3fcfd965fd 100644
--- a/Documentation/git-mailinfo.txt
+++ b/Documentation/git-mailinfo.txt
@@ -9,7 +9,9 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message
SYNOPSIS
--------
[verse]
-'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch>
+'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n]
+ [--[no-]scissors] [--quoted-cr=<action>]
+ <msg> <patch>
DESCRIPTION
@@ -89,6 +91,23 @@ This can be enabled by default with the configuration option mailinfo.scissors.
--no-scissors::
Ignore scissors lines. Useful for overriding mailinfo.scissors settings.
+--quoted-cr=<action>::
+ Action when processes email messages sent with base64 or
+ quoted-printable encoding, and the decoded lines end with a CRLF
+ instead of a simple LF.
++
+The valid actions are:
++
+--
+* `nowarn`: Git will do nothing when such a CRLF is found.
+* `warn`: Git will issue a warning for each message if such a CRLF is
+ found.
+* `strip`: Git will convert those CRLF to LF.
+--
++
+The default action could be set by configuration option `mailinfo.quotedCR`.
+If no such configuration option has been set, `warn` will be used.
+
<msg>::
The commit log message extracted from e-mail, usually
except the title line which comes from e-mail Subject.
diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt
index 80ddd33ceb..1e738ad398 100644
--- a/Documentation/git-maintenance.txt
+++ b/Documentation/git-maintenance.txt
@@ -92,10 +92,8 @@ commit-graph::
prefetch::
The `prefetch` task updates the object directory with the latest
objects from all registered remotes. For each remote, a `git fetch`
- command is run. The refmap is custom to avoid updating local or remote
- branches (those in `refs/heads` or `refs/remotes`). Instead, the
- remote refs are stored in `refs/prefetch/<remote>/`. Also, tags are
- not updated.
+ command is run. The configured refspec is modified to place all
+ requested refs within `refs/prefetch/`. Also, tags are not updated.
+
This is done to avoid disrupting the remote-tracking branches. The end users
expect these refs to stay unmoved unless they initiate a fetch. With prefetch
diff --git a/Documentation/git-mktag.txt b/Documentation/git-mktag.txt
index 17a2603a60..466a697519 100644
--- a/Documentation/git-mktag.txt
+++ b/Documentation/git-mktag.txt
@@ -11,14 +11,6 @@ SYNOPSIS
[verse]
'git mktag'
-OPTIONS
--------
-
---strict::
- By default mktag turns on the equivalent of
- linkgit:git-fsck[1] `--strict` mode. Use `--no-strict` to
- disable it.
-
DESCRIPTION
-----------
@@ -45,6 +37,14 @@ the appropriate `fsck.<msg-id>` varible:
git -c fsck.extraHeaderEntry=ignore mktag <my-tag-with-headers
+OPTIONS
+-------
+
+--strict::
+ By default mktag turns on the equivalent of
+ linkgit:git-fsck[1] `--strict` mode. Use `--no-strict` to
+ disable it.
+
Tag Format
----------
A tag signature file, to be fed to this command's standard input,
diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
index f89e68b424..38e5257b2a 100644
--- a/Documentation/git-p4.txt
+++ b/Documentation/git-p4.txt
@@ -762,3 +762,7 @@ IMPLEMENTATION DETAILS
message indicating the p4 depot location and change number. This
line is used by later 'git p4 sync' operations to know which p4
changes are new.
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt
index f08ae27e2a..55af6fd24e 100644
--- a/Documentation/git-rebase.txt
+++ b/Documentation/git-rebase.txt
@@ -200,12 +200,6 @@ Alternatively, you can undo the 'git rebase' with
git rebase --abort
-CONFIGURATION
--------------
-
-include::config/rebase.txt[]
-include::config/sequencer.txt[]
-
OPTIONS
-------
--onto <newbase>::
@@ -623,6 +617,14 @@ See also INCOMPATIBLE OPTIONS below.
--no-reschedule-failed-exec::
Automatically reschedule `exec` commands that failed. This only makes
sense in interactive mode (or when an `--exec` option was provided).
++
+Even though this option applies once a rebase is started, it's set for
+the whole rebase at the start based on either the
+`rebase.rescheduleFailedExec` configuration (see linkgit:git-config[1]
+or "CONFIGURATION" below) or whether this option is
+provided. Otherwise an explicit `--no-reschedule-failed-exec` at the
+start would be overridden by the presence of
+`rebase.rescheduleFailedExec=true` configuration.
INCOMPATIBLE OPTIONS
--------------------
@@ -1266,6 +1268,12 @@ merge tlsv1.3
merge cmake
------------
+CONFIGURATION
+-------------
+
+include::config/rebase.txt[]
+include::config/sequencer.txt[]
+
BUGS
----
The todo list presented by the deprecated `--preserve-merges --interactive`
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index 317d63cf0d..ef310f362e 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -186,7 +186,7 @@ When `--unpacked` is specified, loose objects are implicitly included in
this "roll-up", without respect to their reachability. This is subject
to change in the future. This option (implying a drastically different
repack mode) is not guaranteed to work with all other combinations of
-option to `git repack`).
+option to `git repack`.
CONFIGURATION
-------------
diff --git a/Documentation/git-rm.txt b/Documentation/git-rm.txt
index ab750367fd..26e9b28470 100644
--- a/Documentation/git-rm.txt
+++ b/Documentation/git-rm.txt
@@ -23,7 +23,9 @@ branch, and no updates to their contents can be staged in the index,
though that default behavior can be overridden with the `-f` option.
When `--cached` is given, the staged content has to
match either the tip of the branch or the file on disk,
-allowing the file to be removed from just the index.
+allowing the file to be removed from just the index. When
+sparse-checkouts are in use (see linkgit:git-sparse-checkout[1]),
+`git rm` will only remove paths within the sparse-checkout patterns.
OPTIONS
diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt
index 93708aefea..3db4eab4ba 100644
--- a/Documentation/git-send-email.txt
+++ b/Documentation/git-send-email.txt
@@ -167,6 +167,14 @@ Sending
`sendemail.envelopeSender` configuration variable; if that is
unspecified, choosing the envelope sender is left to your MTA.
+--sendmail-cmd=<command>::
+ Specify a command to run to send the email. The command should
+ be sendmail-like; specifically, it must support the `-i` option.
+ The command will be executed in the shell if necessary. Default
+ is the value of `sendemail.sendmailcmd`. If unspecified, and if
+ --smtp-server is also unspecified, git-send-email will search
+ for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH.
+
--smtp-encryption=<encryption>::
Specify the encryption to use, either 'ssl' or 'tls'. Any other
value reverts to plain SMTP. Default is the value of
@@ -211,13 +219,16 @@ a password is obtained using 'git-credential'.
--smtp-server=<host>::
If set, specifies the outgoing SMTP server to use (e.g.
- `smtp.example.com` or a raw IP address). Alternatively it can
- specify a full pathname of a sendmail-like program instead;
- the program must support the `-i` option. Default value can
- be specified by the `sendemail.smtpServer` configuration
- option; the built-in default is to search for `sendmail` in
- `/usr/sbin`, `/usr/lib` and $PATH if such program is
- available, falling back to `localhost` otherwise.
+ `smtp.example.com` or a raw IP address). If unspecified, and if
+ `--sendmail-cmd` is also unspecified, the default is to search
+ for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH if such a
+ program is available, falling back to `localhost` otherwise.
++
+For backward compatibility, this option can also specify a full pathname
+of a sendmail-like program instead; the program must support the `-i`
+option. This method does not support passing arguments or using plain
+command names. For those use cases, consider using `--sendmail-cmd`
+instead.
--smtp-server-port=<port>::
Specifies a port different from the default port (SMTP
diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt
index a0eeaeb02e..fdcf43f87c 100644
--- a/Documentation/git-sparse-checkout.txt
+++ b/Documentation/git-sparse-checkout.txt
@@ -45,6 +45,20 @@ To avoid interfering with other worktrees, it first enables the
When `--cone` is provided, the `core.sparseCheckoutCone` setting is
also set, allowing for better performance with a limited set of
patterns (see 'CONE PATTERN SET' below).
++
+Use the `--[no-]sparse-index` option to toggle the use of the sparse
+index format. This reduces the size of the index to be more closely
+aligned with your sparse-checkout definition. This can have significant
+performance advantages for commands such as `git status` or `git add`.
+This feature is still experimental. Some commands might be slower with
+a sparse index until they are properly integrated with the feature.
++
+**WARNING:** Using a sparse index requires modifying the index in a way
+that is not completely understood by external tools. If you have trouble
+with this compatibility, then run `git sparse-checkout init --no-sparse-index`
+to rewrite your index to not be sparse. Older versions of Git will not
+understand the sparse directory entries index extension and may fail to
+interact with your repository until it is disabled.
'set'::
Write a set of patterns to the sparse-checkout file, as given as
diff --git a/Documentation/git-stash.txt b/Documentation/git-stash.txt
index a8c8c32f1e..be6084ccef 100644
--- a/Documentation/git-stash.txt
+++ b/Documentation/git-stash.txt
@@ -91,8 +91,10 @@ show [-u|--include-untracked|--only-untracked] [<diff-options>] [<stash>]::
By default, the command shows the diffstat, but it will accept any
format known to 'git diff' (e.g., `git stash show -p stash@{1}`
to view the second most recent entry in patch form).
- You can use stash.showIncludeUntracked, stash.showStat, and
- stash.showPatch config variables to change the default behavior.
+ If no `<diff-option>` is provided, the default behavior will be given
+ by the `stash.showStat`, and `stash.showPatch` config variables. You
+ can also use `stash.showIncludeUntracked` to set whether
+ `--include-untracked` is enabled by default.
pop [--index] [-q|--quiet] [<stash>]::
diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt
index 67b143cc81..d5776ffcfd 100644
--- a/Documentation/git-svn.txt
+++ b/Documentation/git-svn.txt
@@ -1061,25 +1061,6 @@ with different name spaces. For example:
branches = stable/*:refs/remotes/svn/stable/*
branches = debug/*:refs/remotes/svn/debug/*
-BUGS
-----
-
-We ignore all SVN properties except svn:executable. Any unhandled
-properties are logged to $GIT_DIR/svn/<refname>/unhandled.log
-
-Renamed and copied directories are not detected by Git and hence not
-tracked when committing to SVN. I do not plan on adding support for
-this as it's quite difficult and time-consuming to get working for all
-the possible corner cases (Git doesn't do it, either). Committing
-renamed and copied files is fully supported if they're similar enough
-for Git to detect them.
-
-In SVN, it is possible (though discouraged) to commit changes to a tag
-(because a tag is just a directory copy, thus technically the same as a
-branch). When cloning an SVN repository, 'git svn' cannot know if such a
-commit to a tag will happen in the future. Thus it acts conservatively
-and imports all SVN tags as branches, prefixing the tag name with 'tags/'.
-
CONFIGURATION
-------------
@@ -1166,6 +1147,25 @@ $GIT_DIR/svn/\**/.rev_map.*::
if it is missing or not up to date. 'git svn reset' automatically
rewinds it.
+BUGS
+----
+
+We ignore all SVN properties except svn:executable. Any unhandled
+properties are logged to $GIT_DIR/svn/<refname>/unhandled.log
+
+Renamed and copied directories are not detected by Git and hence not
+tracked when committing to SVN. I do not plan on adding support for
+this as it's quite difficult and time-consuming to get working for all
+the possible corner cases (Git doesn't do it, either). Committing
+renamed and copied files is fully supported if they're similar enough
+for Git to detect them.
+
+In SVN, it is possible (though discouraged) to commit changes to a tag
+(because a tag is just a directory copy, thus technically the same as a
+branch). When cloning an SVN repository, 'git svn' cannot know if such a
+commit to a tag will happen in the future. Thus it acts conservatively
+and imports all SVN tags as branches, prefixing the tag name with 'tags/'.
+
SEE ALSO
--------
linkgit:git-rebase[1]
diff --git a/Documentation/git.txt b/Documentation/git.txt
index 3a9c44987f..6dd241ef83 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -13,7 +13,7 @@ SYNOPSIS
[--exec-path[=<path>]] [--html-path] [--man-path] [--info-path]
[-p|--paginate|-P|--no-pager] [--no-replace-objects] [--bare]
[--git-dir=<path>] [--work-tree=<path>] [--namespace=<name>]
- [--super-prefix=<path>] [--config-env <name>=<envvar>]
+ [--super-prefix=<path>] [--config-env=<name>=<envvar>]
<command> [<args>]
DESCRIPTION
@@ -670,6 +670,16 @@ for further details.
If this environment variable is set to `0`, git will not prompt
on the terminal (e.g., when asking for HTTP authentication).
+`GIT_CONFIG_GLOBAL`::
+`GIT_CONFIG_SYSTEM`::
+ Take the configuration from the given files instead from global or
+ system-level configuration files. If `GIT_CONFIG_SYSTEM` is set, the
+ system config file defined at build time (usually `/etc/gitconfig`)
+ will not be read. Likewise, if `GIT_CONFIG_GLOBAL` is set, neither
+ `$HOME/.gitconfig` nor `$XDG_CONFIG_HOME/git/config` will be read. Can
+ be set to `/dev/null` to skip reading configuration files of the
+ respective level.
+
`GIT_CONFIG_NOSYSTEM`::
Whether to skip reading settings from the system-wide
`$(prefix)/etc/gitconfig` file. This environment variable can
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 0a60472bb5..83fd4e19a4 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -845,6 +845,8 @@ patterns are available:
- `rust` suitable for source code in the Rust language.
+- `scheme` suitable for source code in the Scheme language.
+
- `tex` suitable for source code for LaTeX documents.
@@ -1245,6 +1247,12 @@ to:
[attr]binary -diff -merge -text
------------
+NOTES
+-----
+
+Git does not follow symbolic links when accessing a `.gitattributes`
+file in the working tree. This keeps behavior consistent when the file
+is accessed from the index or a tree versus from the filesystem.
EXAMPLES
--------
diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index 5751603b13..53e7d5c914 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -149,6 +149,10 @@ not tracked by Git remain untracked.
To stop tracking a file that is currently tracked, use
'git rm --cached'.
+Git does not follow symbolic links when accessing a `.gitignore` file in
+the working tree. This keeps behavior consistent when the file is
+accessed from the index or a tree versus from the filesystem.
+
EXAMPLES
--------
diff --git a/Documentation/gitmailmap.txt b/Documentation/gitmailmap.txt
index 3fb39f801f..06f4af93fe 100644
--- a/Documentation/gitmailmap.txt
+++ b/Documentation/gitmailmap.txt
@@ -55,6 +55,13 @@ this would also match the 'Commit Name <commit&#64;email.xx>' above:
Proper Name <proper@email.xx> CoMmIt NaMe <CoMmIt@EmAiL.xX>
--
+NOTES
+-----
+
+Git does not follow symbolic links when accessing a `.mailmap` file in
+the working tree. This keeps behavior consistent when the file is
+accessed from the index or a tree versus from the filesystem.
+
EXAMPLES
--------
diff --git a/Documentation/gitmodules.txt b/Documentation/gitmodules.txt
index 8e333dde1b..dcee09b500 100644
--- a/Documentation/gitmodules.txt
+++ b/Documentation/gitmodules.txt
@@ -98,6 +98,14 @@ submodule.<name>.shallow::
shallow clone (with a history depth of 1) unless the user explicitly
asks for a non-shallow clone.
+NOTES
+-----
+
+Git does not allow the `.gitmodules` file within a working tree to be a
+symbolic link, and will refuse to check out such a tree entry. This
+keeps behavior consistent when the file is accessed from the index or a
+tree versus from the filesystem, and helps Git reliably enforce security
+checks of the file contents.
EXAMPLES
--------
diff --git a/Documentation/gitnamespaces.txt b/Documentation/gitnamespaces.txt
index b614969ad2..1c8d2ecc35 100644
--- a/Documentation/gitnamespaces.txt
+++ b/Documentation/gitnamespaces.txt
@@ -62,3 +62,7 @@ git clone ext::'git --namespace=foo %s /tmp/prefixed.git'
----------
include::transfer-data-leaks.txt[]
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/gitweb.conf.txt b/Documentation/gitweb.conf.txt
index 7963a79ba9..34b1d6e224 100644
--- a/Documentation/gitweb.conf.txt
+++ b/Documentation/gitweb.conf.txt
@@ -751,6 +751,17 @@ default font sizes or lineheights are changed (e.g. via adding extra
CSS stylesheet in `@stylesheets`), it may be appropriate to change
these values.
+email-privacy::
+ Redact e-mail addresses from the generated HTML, etc. content.
+ This obscures e-mail addresses retrieved from the author/committer
+ and comment sections of the Git log.
+ It is meant to hinder web crawlers that harvest and abuse addresses.
+ Such crawlers may not respect robots.txt.
+ Note that users and user tools also see the addresses as redacted.
+ If Gitweb is not the final step in a workflow then subsequent steps
+ may misbehave because of the redacted information they receive.
+ Disabled by default.
+
highlight::
Server-side syntax highlight support in "blob" view. It requires
`$highlight_bin` program to be available (see the description of
diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt
index 67c7a50b96..c077971335 100644
--- a/Documentation/glossary-content.txt
+++ b/Documentation/glossary-content.txt
@@ -146,8 +146,8 @@ current branch integrates with) obviously do not work, as there is no
<<def_revision,revision>> and you are "merging" another
<<def_branch,branch>>'s changes that happen to be a descendant of what
you have. In such a case, you do not make a new <<def_merge,merge>>
- <<def_commit,commit>> but instead just update to his
- revision. This will happen frequently on a
+ <<def_commit,commit>> but instead just update your branch to point at the same
+ revision as the branch you are merging. This will happen frequently on a
<<def_remote_tracking_branch,remote-tracking branch>> of a remote
<<def_repository,repository>>.
diff --git a/Documentation/lint-gitlink.perl b/Documentation/lint-gitlink.perl
index 476cc30b83..b22a367844 100755
--- a/Documentation/lint-gitlink.perl
+++ b/Documentation/lint-gitlink.perl
@@ -1,71 +1,67 @@
#!/usr/bin/perl
-use File::Find;
-use Getopt::Long;
+use strict;
+use warnings;
-my $basedir = ".";
-GetOptions("basedir=s" => \$basedir)
- or die("Cannot parse command line arguments\n");
+# Parse arguments, a simple state machine for input like:
+#
+# howto/*.txt config/*.txt --section=1 git.txt git-add.txt [...] --to-lint git-add.txt a-file.txt [...]
+my %TXT;
+my %SECTION;
+my $section;
+my $lint_these = 0;
+for my $arg (@ARGV) {
+ if (my ($sec) = $arg =~ /^--section=(\d+)$/s) {
+ $section = $sec;
+ next;
+ }
-my $found_errors = 0;
+ my ($name) = $arg =~ /^(.*?)\.txt$/s;
+ unless (defined $section) {
+ $TXT{$name} = $arg;
+ next;
+ }
-sub report {
- my ($where, $what, $error) = @_;
- print "$where: $error: $what\n";
- $found_errors = 1;
+ $SECTION{$name} = $section;
}
-sub grab_section {
- my ($page) = @_;
- open my $fh, "<", "$basedir/$page.txt";
- my $firstline = <$fh>;
- chomp $firstline;
- close $fh;
- my ($section) = ($firstline =~ /.*\((\d)\)$/);
- return $section;
+my $exit_code = 0;
+sub report {
+ my ($pos, $line, $target, $msg) = @_;
+ substr($line, $pos) = "' <-- HERE";
+ $line =~ s/^\s+//;
+ print "$ARGV:$.: error: $target: $msg, shown with 'HERE' below:\n";
+ print "$ARGV:$.:\t'$line\n";
+ $exit_code = 1;
}
-sub lint {
- my ($file) = @_;
- open my $fh, "<", $file
- or return;
- while (<$fh>) {
- my $where = "$file:$.";
- while (s/linkgit:((.*?)\[(\d)\])//) {
- my ($target, $page, $section) = ($1, $2, $3);
+@ARGV = sort values %TXT;
+die "BUG: Nothing to process!" unless @ARGV;
+while (<>) {
+ my $line = $_;
+ while ($line =~ m/linkgit:((.*?)\[(\d)\])/g) {
+ my $pos = pos $line;
+ my ($target, $page, $section) = ($1, $2, $3);
- # De-AsciiDoc
- $page =~ s/{litdd}/--/g;
+ # De-AsciiDoc
+ $page =~ s/{litdd}/--/g;
- if ($page !~ /^git/) {
- report($where, $target, "nongit link");
- next;
- }
- if (! -f "$basedir/$page.txt") {
- report($where, $target, "no such source");
- next;
- }
- $real_section = grab_section($page);
- if ($real_section != $section) {
- report($where, $target,
- "wrong section (should be $real_section)");
- next;
- }
+ if (!exists $TXT{$page}) {
+ report($pos, $line, $target, "link outside of our own docs");
+ next;
+ }
+ if (!exists $SECTION{$page}) {
+ report($pos, $line, $target, "link outside of our sectioned docs");
+ next;
+ }
+ my $real_section = $SECTION{$page};
+ if ($section != $SECTION{$page}) {
+ report($pos, $line, $target, "wrong section (should be $real_section)");
+ next;
}
}
- close $fh;
-}
-
-sub lint_it {
- lint($File::Find::name) if -f && /\.txt$/;
-}
-
-if (!@ARGV) {
- find({ wanted => \&lint_it, no_chdir => 1 }, $basedir);
-} else {
- for (@ARGV) {
- lint($_);
- }
+ # this resets our $. for each file
+ close ARGV if eof;
}
-exit $found_errors;
+exit $exit_code;
diff --git a/Documentation/lint-man-end-blurb.perl b/Documentation/lint-man-end-blurb.perl
new file mode 100755
index 0000000000..d69312e5db
--- /dev/null
+++ b/Documentation/lint-man-end-blurb.perl
@@ -0,0 +1,24 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my $exit_code = 0;
+sub report {
+ my ($target, $msg) = @_;
+ print "error: $target: $msg\n";
+ $exit_code = 1;
+}
+
+local $/;
+while (my $slurp = <>) {
+ report($ARGV, "has no 'Part of the linkgit:git[1] suite' end blurb")
+ unless $slurp =~ m[
+ ^GIT\n
+ ---\n
+ \QPart of the linkgit:git[1] suite\E \n
+ \z
+ ]mx;
+}
+
+exit $exit_code;
diff --git a/Documentation/lint-man-section-order.perl b/Documentation/lint-man-section-order.perl
new file mode 100755
index 0000000000..b05f9156dd
--- /dev/null
+++ b/Documentation/lint-man-section-order.perl
@@ -0,0 +1,105 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my %SECTIONS;
+{
+ my $order = 0;
+ %SECTIONS = (
+ 'NAME' => {
+ required => 1,
+ order => $order++,
+ },
+ 'SYNOPSIS' => {
+ required => 1,
+ order => $order++,
+ },
+ 'DESCRIPTION' => {
+ required => 1,
+ order => $order++,
+ },
+ 'OPTIONS' => {
+ order => $order++,
+ required => 0,
+ },
+ 'CONFIGURATION' => {
+ order => $order++,
+ },
+ 'BUGS' => {
+ order => $order++,
+ },
+ 'SEE ALSO' => {
+ order => $order++,
+ },
+ 'GIT' => {
+ required => 1,
+ order => $order++,
+ },
+ );
+}
+my $SECTION_RX = do {
+ my ($names) = join "|", keys %SECTIONS;
+ qr/^($names)$/s;
+};
+
+my $exit_code = 0;
+sub report {
+ my ($msg) = @_;
+ print "$ARGV:$.: $msg\n";
+ $exit_code = 1;
+}
+
+my $last_was_section;
+my @actual_order;
+while (my $line = <>) {
+ chomp $line;
+ if ($line =~ $SECTION_RX) {
+ push @actual_order => $line;
+ $last_was_section = 1;
+ # Have no "last" section yet, processing NAME
+ next if @actual_order == 1;
+
+ my @expected_order = sort {
+ $SECTIONS{$a}->{order} <=> $SECTIONS{$b}->{order}
+ } @actual_order;
+
+ my $expected_last = $expected_order[-2];
+ my $actual_last = $actual_order[-2];
+ if ($actual_last ne $expected_last) {
+ report("section '$line' incorrectly ordered, comes after '$actual_last'");
+ }
+ next;
+ }
+ if ($last_was_section) {
+ my $last_section = $actual_order[-1];
+ if (length $last_section ne length $line) {
+ report("dashes under '$last_section' should match its length!");
+ }
+ if ($line !~ /^-+$/) {
+ report("dashes under '$last_section' should be '-' dashes!");
+ }
+ $last_was_section = 0;
+ }
+
+ if (eof) {
+ # We have both a hash and an array to consider, for
+ # convenience
+ my %actual_sections;
+ @actual_sections{@actual_order} = ();
+
+ for my $section (sort keys %SECTIONS) {
+ next if !$SECTIONS{$section}->{required} or exists $actual_sections{$section};
+ report("has no required '$section' section!");
+ }
+
+ # Reset per-file state
+ {
+ @actual_order = ();
+ # this resets our $. for each file
+ close ARGV;
+ }
+ }
+}
+
+exit $exit_code;
diff --git a/Documentation/pretty-formats.txt b/Documentation/pretty-formats.txt
index 45133066e4..ef6bd420ae 100644
--- a/Documentation/pretty-formats.txt
+++ b/Documentation/pretty-formats.txt
@@ -190,6 +190,8 @@ The placeholders are:
'%ai':: author date, ISO 8601-like format
'%aI':: author date, strict ISO 8601 format
'%as':: author date, short format (`YYYY-MM-DD`)
+'%ah':: author date, human style (like the `--date=human` option of
+ linkgit:git-rev-list[1])
'%cn':: committer name
'%cN':: committer name (respecting .mailmap, see
linkgit:git-shortlog[1] or linkgit:git-blame[1])
@@ -206,6 +208,8 @@ The placeholders are:
'%ci':: committer date, ISO 8601-like format
'%cI':: committer date, strict ISO 8601 format
'%cs':: committer date, short format (`YYYY-MM-DD`)
+'%ch':: committer date, human style (like the `--date=human` option of
+ linkgit:git-rev-list[1])
'%d':: ref names, like the --decorate option of linkgit:git-log[1]
'%D':: ref names without the " (", ")" wrapping.
'%(describe[:options])':: human-readable name, like
@@ -267,7 +271,7 @@ endif::git-rev-list[]
`trailers` string may be followed by a colon
and zero or more comma-separated options.
If any option is provided multiple times the
- last occurance wins.
+ last occurrence wins.
+
The boolean options accept an optional value `[=<BOOL>]`. The values
`true`, `false`, `on`, `off` etc. are all accepted. See the "boolean"
diff --git a/Documentation/revisions.txt b/Documentation/revisions.txt
index d9169c062e..f5f17b65a1 100644
--- a/Documentation/revisions.txt
+++ b/Documentation/revisions.txt
@@ -260,6 +260,9 @@ any of the given commits.
A commit's reachable set is the commit itself and the commits in
its ancestry chain.
+There are several notations to specify a set of connected commits
+(called a "revision range"), illustrated below.
+
Commit Exclusions
~~~~~~~~~~~~~~~~~
@@ -294,6 +297,26 @@ is a shorthand for 'HEAD..origin' and asks "What did the origin do since
I forked from them?" Note that '..' would mean 'HEAD..HEAD' which is an
empty range that is both reachable and unreachable from HEAD.
+Commands that are specifically designed to take two distinct ranges
+(e.g. "git range-diff R1 R2" to compare two ranges) do exist, but
+they are exceptions. Unless otherwise noted, all "git" commands
+that operate on a set of commits work on a single revision range.
+In other words, writing two "two-dot range notation" next to each
+other, e.g.
+
+ $ git log A..B C..D
+
+does *not* specify two revision ranges for most commands. Instead
+it will name a single connected set of commits, i.e. those that are
+reachable from either B or D but are reachable from neither A or C.
+In a linear history like this:
+
+ ---A---B---o---o---C---D
+
+because A and B are reachable from C, the revision range specified
+by these two dotted ranges is a single commit D.
+
+
Other <rev>{caret} Parent Shorthand Notations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Three other shorthands exist, particularly useful for merge commits,
diff --git a/Documentation/technical/api-error-handling.txt b/Documentation/technical/api-error-handling.txt
index ceeedd485c..8be4f4d0d6 100644
--- a/Documentation/technical/api-error-handling.txt
+++ b/Documentation/technical/api-error-handling.txt
@@ -1,8 +1,11 @@
Error reporting in git
======================
-`die`, `usage`, `error`, and `warning` report errors of various
-kinds.
+`BUG`, `die`, `usage`, `error`, and `warning` report errors of
+various kinds.
+
+- `BUG` is for failed internal assertions that should never happen,
+ i.e. a bug in git itself.
- `die` is for fatal application errors. It prints a message to
the user and exits with status 128.
@@ -20,6 +23,9 @@ kinds.
without running into too many problems. Like `error`, it
returns -1 after reporting the situation to the caller.
+These reports will be logged via the trace2 facility. See the "error"
+event in link:api-trace2.txt[trace2 API].
+
Customizable error handlers
---------------------------
diff --git a/Documentation/technical/api-trace2.txt b/Documentation/technical/api-trace2.txt
index c65ffafc48..3f52f981a2 100644
--- a/Documentation/technical/api-trace2.txt
+++ b/Documentation/technical/api-trace2.txt
@@ -465,7 +465,7 @@ completed.)
------------
`"error"`::
- This event is emitted when one of the `error()`, `die()`,
+ This event is emitted when one of the `BUG()`, `error()`, `die()`,
`warning()`, or `usage()` functions are called.
+
------------
diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index d363a71c37..65da0daaa5 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -44,6 +44,13 @@ Git index format
localization, no special casing of directory separator '/'). Entries
with the same name are sorted by their stage field.
+ An index entry typically represents a file. However, if sparse-checkout
+ is enabled in cone mode (`core.sparseCheckoutCone` is enabled) and the
+ `extensions.sparseIndex` extension is enabled, then the index may
+ contain entries for directories outside of the sparse-checkout definition.
+ These entries have mode `040000`, include the `SKIP_WORKTREE` bit, and
+ the path ends in a directory separator.
+
32-bit ctime seconds, the last time a file's metadata changed
this is stat(2) data
@@ -385,3 +392,15 @@ The remaining data of each directory block is grouped by type:
in this block of entries.
- 32-bit count of cache entries in this block
+
+== Sparse Directory Entries
+
+ When using sparse-checkout in cone mode, some entire directories within
+ the index can be summarized by pointing to a tree object instead of the
+ entire expanded list of paths within that tree. An index containing such
+ entries is a "sparse index". Index format versions 4 and less were not
+ implemented with such entries in mind. Thus, for these versions, an
+ index containing sparse directory entries will include this extension
+ with signature { 's', 'd', 'i', 'r' }. Like the split-index extension,
+ tools should avoid interacting with a sparse index unless they understand
+ this extension.
diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt
index f7eabc6c76..1eb525fe76 100644
--- a/Documentation/technical/packfile-uri.txt
+++ b/Documentation/technical/packfile-uri.txt
@@ -35,13 +35,14 @@ include some sort of non-trivial implementation in the Minimum Viable Product,
at least so that we can test the client.
This is the implementation: a feature, marked experimental, that allows the
-server to be configured by one or more `uploadpack.blobPackfileUri=<sha1>
-<uri>` entries. Whenever the list of objects to be sent is assembled, all such
-blobs are excluded, replaced with URIs. As noted in "Future work" below, the
-server can evolve in the future to support excluding other objects (or other
-implementations of servers could be made that support excluding other objects)
-without needing a protocol change, so clients should not expect that packfiles
-downloaded in this way only contain single blobs.
+server to be configured by one or more `uploadpack.blobPackfileUri=
+<object-hash> <pack-hash> <uri>` entries. Whenever the list of objects to be
+sent is assembled, all such blobs are excluded, replaced with URIs. As noted
+in "Future work" below, the server can evolve in the future to support
+excluding other objects (or other implementations of servers could be made
+that support excluding other objects) without needing a protocol change, so
+clients should not expect that packfiles downloaded in this way only contain
+single blobs.
Client design
-------------
diff --git a/Documentation/technical/parallel-checkout.txt b/Documentation/technical/parallel-checkout.txt
new file mode 100644
index 0000000000..e790258a1a
--- /dev/null
+++ b/Documentation/technical/parallel-checkout.txt
@@ -0,0 +1,270 @@
+Parallel Checkout Design Notes
+==============================
+
+The "Parallel Checkout" feature attempts to use multiple processes to
+parallelize the work of uncompressing the blobs, applying in-core
+filters, and writing the resulting contents to the working tree during a
+checkout operation. It can be used by all checkout-related commands,
+such as `clone`, `checkout`, `reset`, `sparse-checkout`, and others.
+
+These commands share the following basic structure:
+
+* Step 1: Read the current index file into memory.
+
+* Step 2: Modify the in-memory index based upon the command, and
+ temporarily mark all cache entries that need to be updated.
+
+* Step 3: Populate the working tree to match the new candidate index.
+ This includes iterating over all of the to-be-updated cache entries
+ and delete, create, or overwrite the associated files in the working
+ tree.
+
+* Step 4: Write the new index to disk.
+
+Step 3 is the focus of the "parallel checkout" effort described here.
+
+Sequential Implementation
+-------------------------
+
+For the purposes of discussion here, the current sequential
+implementation of Step 3 is divided in 3 parts, each one implemented in
+its own function:
+
+* Step 3a: `unpack-trees.c:check_updates()` contains a series of
+ sequential loops iterating over the `cache_entry`'s array. The main
+ loop in this function calls the Step 3b function for each of the
+ to-be-updated entries.
+
+* Step 3b: `entry.c:checkout_entry()` examines the existing working tree
+ for file conflicts, collisions, and unsaved changes. It removes files
+ and creates leading directories as necessary. It calls the Step 3c
+ function for each entry to be written.
+
+* Step 3c: `entry.c:write_entry()` loads the blob into memory, smudges
+ it if necessary, creates the file in the working tree, writes the
+ smudged contents, calls `fstat()` or `lstat()`, and updates the
+ associated `cache_entry` struct with the stat information gathered.
+
+It wouldn't be safe to perform Step 3b in parallel, as there could be
+race conditions between file creations and removals. Instead, the
+parallel checkout framework lets the sequential code handle Step 3b,
+and uses parallel workers to replace the sequential
+`entry.c:write_entry()` calls from Step 3c.
+
+Rejected Multi-Threaded Solution
+--------------------------------
+
+The most "straightforward" implementation would be to spread the set of
+to-be-updated cache entries across multiple threads. But due to the
+thread-unsafe functions in the ODB code, we would have to use locks to
+coordinate the parallel operation. An early prototype of this solution
+showed that the multi-threaded checkout would bring performance
+improvements over the sequential code, but there was still too much lock
+contention. A `perf` profiling indicated that around 20% of the runtime
+during a local Linux clone (on an SSD) was spent in locking functions.
+For this reason this approach was rejected in favor of using multiple
+child processes, which led to a better performance.
+
+Multi-Process Solution
+----------------------
+
+Parallel checkout alters the aforementioned Step 3 to use multiple
+`checkout--worker` background processes to distribute the work. The
+long-running worker processes are controlled by the foreground Git
+command using the existing run-command API.
+
+Overview
+~~~~~~~~
+
+Step 3b is only slightly altered; for each entry to be checked out, the
+main process performs the following steps:
+
+* M1: Check whether there is any untracked or unclean file in the
+ working tree which would be overwritten by this entry, and decide
+ whether to proceed (removing the file(s)) or not.
+
+* M2: Create the leading directories.
+
+* M3: Load the conversion attributes for the entry's path.
+
+* M4: Check, based on the entry's type and conversion attributes,
+ whether the entry is eligible for parallel checkout (more on this
+ later). If it is eligible, enqueue the entry and the loaded
+ attributes to later write the entry in parallel. If not, write the
+ entry right away, using the default sequential code.
+
+Note: we save the conversion attributes associated with each entry
+because the workers don't have access to the main process' index state,
+so they can't load the attributes by themselves (and the attributes are
+needed to properly smudge the entry). Additionally, this has a positive
+impact on performance as (1) we don't need to load the attributes twice
+and (2) the attributes machinery is optimized to handle paths in
+sequential order.
+
+After all entries have passed through the above steps, the main process
+checks if the number of enqueued entries is sufficient to spread among
+the workers. If not, it just writes them sequentially. Otherwise, it
+spawns the workers and distributes the queued entries uniformly in
+continuous chunks. This aims to minimize the chances of two workers
+writing to the same directory simultaneously, which could increase lock
+contention in the kernel.
+
+Then, for each assigned item, each worker:
+
+* W1: Checks if there is any non-directory file in the leading part of
+ the entry's path or if there already exists a file at the entry' path.
+ If so, mark the entry with `PC_ITEM_COLLIDED` and skip it (more on
+ this later).
+
+* W2: Creates the file (with O_CREAT and O_EXCL).
+
+* W3: Loads the blob into memory (inflating and delta reconstructing
+ it).
+
+* W4: Applies any required in-process filter, like end-of-line
+ conversion and re-encoding.
+
+* W5: Writes the result to the file descriptor opened at W2.
+
+* W6: Calls `fstat()` or lstat()` on the just-written path, and sends
+ the result back to the main process, together with the end status of
+ the operation and the item's identification number.
+
+Note that, when possible, steps W3 to W5 are delegated to the streaming
+machinery, removing the need to keep the entire blob in memory.
+
+If the worker fails to read the blob or to write it to the working tree,
+it removes the created file to avoid leaving empty files behind. This is
+the *only* time a worker is allowed to remove a file.
+
+As mentioned earlier, it is the responsibility of the main process to
+remove any file that blocks the checkout operation (or abort if the
+removal(s) would cause data loss and the user didn't ask to `--force`).
+This is crucial to avoid race conditions and also to properly detect
+path collisions at Step W1.
+
+After the workers finish writing the items and sending back the required
+information, the main process handles the results in two steps:
+
+- First, it updates the in-memory index with the `lstat()` information
+ sent by the workers. (This must be done first as this information
+ might me required in the following step.)
+
+- Then it writes the items which collided on disk (i.e. items marked
+ with `PC_ITEM_COLLIDED`). More on this below.
+
+Path Collisions
+---------------
+
+Path collisions happen when two different paths correspond to the same
+entry in the file system. E.g. the paths 'a' and 'A' would collide in a
+case-insensitive file system.
+
+The sequential checkout deals with collisions in the same way that it
+deals with files that were already present in the working tree before
+checkout. Basically, it checks if the path that it wants to write
+already exists on disk, makes sure the existing file doesn't have
+unsaved data, and then overwrites it. (To be more pedantic: it deletes
+the existing file and creates the new one.) So, if there are multiple
+colliding files to be checked out, the sequential code will write each
+one of them but only the last will actually survive on disk.
+
+Parallel checkout aims to reproduce the same behavior. However, we
+cannot let the workers racily write to the same file on disk. Instead,
+the workers detect when the entry that they want to check out would
+collide with an existing file, and mark it with `PC_ITEM_COLLIDED`.
+Later, the main process can sequentially feed these entries back to
+`checkout_entry()` without the risk of race conditions. On clone, this
+also has the effect of marking the colliding entries to later emit a
+warning for the user, like the classic sequential checkout does.
+
+The workers are able to detect both collisions among the entries being
+concurrently written and collisions between a parallel-eligible entry
+and an ineligible entry. The general idea for collision detection is
+quite straightforward: for each parallel-eligible entry, the main
+process must remove all files that prevent this entry from being written
+(before enqueueing it). This includes any non-directory file in the
+leading path of the entry. Later, when a worker gets assigned the entry,
+it looks again for the non-directories files and for an already existing
+file at the entry's path. If any of these checks finds something, the
+worker knows that there was a path collision.
+
+Because parallel checkout can distinguish path collisions from the case
+where the file was already present in the working tree before checkout,
+we could alternatively choose to skip the checkout of colliding entries.
+However, each entry that doesn't get written would have NULL `lstat()`
+fields on the index. This could cause performance penalties for
+subsequent commands that need to refresh the index, as they would have
+to go to the file system to see if the entry is dirty. Thus, if we have
+N entries in a colliding group and we decide to write and `lstat()` only
+one of them, every subsequent `git-status` will have to read, convert,
+and hash the written file N - 1 times. By checking out all colliding
+entries (like the sequential code does), we only pay the overhead once,
+during checkout.
+
+Eligible Entries for Parallel Checkout
+--------------------------------------
+
+As previously mentioned, not all entries passed to `checkout_entry()`
+will be considered eligible for parallel checkout. More specifically, we
+exclude:
+
+- Symbolic links; to avoid race conditions that, in combination with
+ path collisions, could cause workers to write files at the wrong
+ place. For example, if we were to concurrently check out a symlink
+ 'a' -> 'b' and a regular file 'A/f' in a case-insensitive file system,
+ we could potentially end up writing the file 'A/f' at 'a/f', due to a
+ race condition.
+
+- Regular files that require external filters (either "one shot" filters
+ or long-running process filters). These filters are black-boxes to Git
+ and may have their own internal locking or non-concurrent assumptions.
+ So it might not be safe to run multiple instances in parallel.
++
+Besides, long-running filters may use the delayed checkout feature to
+postpone the return of some filtered blobs. The delayed checkout queue
+and the parallel checkout queue are not compatible and should remain
+separate.
++
+Note: regular files that only require internal filters, like end-of-line
+conversion and re-encoding, are eligible for parallel checkout.
+
+Ineligible entries are checked out by the classic sequential codepath
+*before* spawning workers.
+
+Note: submodules's files are also eligible for parallel checkout (as
+long as they don't fall into any of the excluding categories mentioned
+above). But since each submodule is checked out in its own child
+process, we don't mix the superproject's and the submodules' files in
+the same parallel checkout process or queue.
+
+The API
+-------
+
+The parallel checkout API was designed with the goal of minimizing
+changes to the current users of the checkout machinery. This means that
+they don't have to call a different function for sequential or parallel
+checkout. As already mentioned, `checkout_entry()` will automatically
+insert the given entry in the parallel checkout queue when this feature
+is enabled and the entry is eligible; otherwise, it will just write the
+entry right away, using the sequential code. In general, callers of the
+parallel checkout API should look similar to this:
+
+----------------------------------------------
+int pc_workers, pc_threshold, err = 0;
+struct checkout state;
+
+get_parallel_checkout_configs(&pc_workers, &pc_threshold);
+
+/*
+ * This check is not strictly required, but it
+ * should save some time in sequential mode.
+ */
+if (pc_workers > 1)
+ init_parallel_checkout();
+
+for (each cache_entry ce to-be-updated)
+ err |= checkout_entry(ce, &state, NULL, NULL);
+
+err |= run_parallel_checkout(&state, pc_workers, pc_threshold, NULL, NULL);
+----------------------------------------------
diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt
index a7c806a73e..a1e31367f4 100644
--- a/Documentation/technical/protocol-v2.txt
+++ b/Documentation/technical/protocol-v2.txt
@@ -346,6 +346,14 @@ explained below.
client should download from all given URIs. Currently, the
protocols supported are "http" and "https".
+If the 'wait-for-done' feature is advertised, the following argument
+can be included in the client's request.
+
+ wait-for-done
+ Indicates to the server that it should never send "ready", but
+ should wait for the client to say "done" before sending the
+ packfile.
+
The response of `fetch` is broken into a number of sections separated by
delimiter packets (0001), with each section beginning with its section
header. Most sections are sent only when the packfile is sent.
@@ -514,3 +522,34 @@ packet-line, and must not contain non-printable or whitespace characters. The
current implementation uses trace2 session IDs (see
link:api-trace2.html[api-trace2] for details), but this may change and users of
the session ID should not rely on this fact.
+
+object-info
+~~~~~~~~~~~
+
+`object-info` is the command to retrieve information about one or more objects.
+Its main purpose is to allow a client to make decisions based on this
+information without having to fully fetch objects. Object size is the only
+information that is currently supported.
+
+An `object-info` request takes the following arguments:
+
+ size
+ Requests size information to be returned for each listed object id.
+
+ oid <oid>
+ Indicates to the server an object which the client wants to obtain
+ information for.
+
+The response of `object-info` is a list of the the requested object ids
+and associated requested information, each separated by a single space.
+
+ output = info flush-pkt
+
+ info = PKT-LINE(attrs) LF)
+ *PKT-LINE(obj-info LF)
+
+ attrs = attr | attrs SP attrs
+
+ attr = "size"
+
+ obj-info = obj-id SP obj-size
diff --git a/Documentation/technical/reftable.txt b/Documentation/technical/reftable.txt
index 3ef169af27..d7c3b645cf 100644
--- a/Documentation/technical/reftable.txt
+++ b/Documentation/technical/reftable.txt
@@ -1011,8 +1011,13 @@ reftable stack, reload `tables.list`, and delete any tables no longer mentioned
in `tables.list`.
Irregular program exit may still leave about unused files. In this case, a
-cleanup operation can read `tables.list`, note its modification timestamp, and
-delete any unreferenced `*.ref` files that are older.
+cleanup operation should proceed as follows:
+
+* take a lock `tables.list.lock` to prevent concurrent modifications
+* refresh the reftable stack, by reading `tables.list`
+* for each `*.ref` file, remove it if
+** it is not mentioned in `tables.list`, and
+** its max update_index is not beyond the max update_index of the stack
Alternatives considered
diff --git a/Documentation/technical/remembering-renames.txt b/Documentation/technical/remembering-renames.txt
new file mode 100644
index 0000000000..2fd5cc88e0
--- /dev/null
+++ b/Documentation/technical/remembering-renames.txt
@@ -0,0 +1,671 @@
+Rebases and cherry-picks involve a sequence of merges whose results are
+recorded as new single-parent commits. The first parent side of those
+merges represent the "upstream" side, and often include a far larger set of
+changes than the second parent side. Traditionally, the renames on the
+first-parent side of that sequence of merges were repeatedly re-detected
+for every merge. This file explains why it is safe and effective during
+rebases and cherry-picks to remember renames on the upstream side of
+history as an optimization, assuming all merges are automatic and clean
+(i.e. no conflicts and not interrupted for user input or editing).
+
+Outline:
+
+ 0. Assumptions
+
+ 1. How rebasing and cherry-picking work
+
+ 2. Why the renames on MERGE_SIDE1 in any given pick are *always* a
+ superset of the renames on MERGE_SIDE1 for the next pick.
+
+ 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ always also
+ a rename on MERGE_SIDE1 for the next pick
+
+ 4. A detailed description of the the counter-examples to #3.
+
+ 5. Why the special cases in #4 are still fully reasonable to use to pair
+ up files for three-way content merging in the merge machinery, and why
+ they do not affect the correctness of the merge.
+
+ 6. Interaction with skipping of "irrelevant" renames
+
+ 7. Additional items that need to be cached
+
+ 8. How directory rename detection interacts with the above and why this
+ optimization is still safe even if merge.directoryRenames is set to
+ "true".
+
+
+=== 0. Assumptions ===
+
+There are two assumptions that will hold throughout this document:
+
+ * The upstream side where commits are transplanted to is treated as the
+ first parent side when rebase/cherry-pick call the merge machinery
+
+ * All merges are fully automatic
+
+and a third that will hold in sections 2-5 for simplicity, that I'll later
+address in section 8:
+
+ * No directory renames occur
+
+
+Let me explain more about each assumption and why I include it:
+
+
+The first assumption is merely for the purposes of making this document
+clearer; the optimization implementation does not actually depend upon it.
+However, the assumption does hold in all cases because it reflects the way
+that both rebase and cherry-pick were implemented; and the implementation
+of cherry-pick and rebase are not readily changeable for backwards
+compatibility reasons (see for example the discussion of the --ours and
+--theirs flag in the documentation of `git checkout`, particularly the
+comments about how they behave with rebase). The optimization avoids
+checking first-parent-ness, though. It checks the conditions that make the
+optimization valid instead, so it would still continue working if someone
+changed the parent ordering that cherry-pick and rebase use. But making
+this assumption does make this document much clearer and prevents me from
+having to repeat every example twice.
+
+If the second assumption is violated, then the optimization simply is
+turned off and thus isn't relevant to consider. The second assumption can
+also be stated as "there is no interruption for a user to resolve conflicts
+or to just further edit or tweak files". While real rebases and
+cherry-picks are often interrupted (either because it's an interactive
+rebase where the user requested to stop and edit, or because there were
+conflicts that the user needs to resolve), the cache of renames is not
+stored on disk, and thus is thrown away as soon as the rebase or cherry
+pick stops for the user to resolve the operation.
+
+The third assumption makes sections 2-5 simpler, and allows people to
+understand the basics of why this optimization is safe and effective, and
+then I can go back and address the specifics in section 8. It is probably
+also worth noting that if directory renames do occur, then the default of
+merge.directoryRenames being set to "conflict" means that the operation
+will stop for users to resolve the conflicts and the cache will be thrown
+away, and thus that there won't be an optimization to apply. So, the only
+reason we need to address directory renames specifically, is that some
+users will have set merge.directoryRenames to "true" to allow the merges to
+continue to proceed automatically. The optimization is still safe with
+this config setting, but we have to discuss a few more cases to show why;
+this discussion is deferred until section 8.
+
+
+=== 1. How rebasing and cherry-picking work ===
+
+Consider the following setup (from the git-rebase manpage):
+
+ A---B---C topic
+ /
+ D---E---F---G main
+
+After rebasing or cherry-picking topic onto main, this will appear as:
+
+ A'--B'--C' topic
+ /
+ D---E---F---G main
+
+The way the commits A', B', and C' are created is through a series of
+merges, where rebase or cherry-pick sequentially uses each of the three
+A-B-C commits in a special merge operation. Let's label the three commits
+in the merge operation as MERGE_BASE, MERGE_SIDE1, and MERGE_SIDE2. For
+this picture, the three commits for each of the three merges would be:
+
+To create A':
+ MERGE_BASE: E
+ MERGE_SIDE1: G
+ MERGE_SIDE2: A
+
+To create B':
+ MERGE_BASE: A
+ MERGE_SIDE1: A'
+ MERGE_SIDE2: B
+
+To create C':
+ MERGE_BASE: B
+ MERGE_SIDE1: B'
+ MERGE_SIDE2: C
+
+Sometimes, folks are surprised that these three-way merges are done. It
+can be useful in understanding these three-way merges to view them in a
+slightly different light. For example, in creating C', you can view it as
+either:
+
+ * Apply the changes between B & C to B'
+ * Apply the changes between B & B' to C
+
+Conceptually the two statements above are the same as a three-way merge of
+B, B', and C, at least the parts before you decide to record a commit.
+
+
+=== 2. Why the renames on MERGE_SIDE1 in any given pick are always a ===
+=== superset of the renames on MERGE_SIDE1 for the next pick. ===
+
+The merge machinery uses the filenames it is fed from MERGE_BASE,
+MERGE_SIDE1, and MERGE_SIDE2. It will only move content to a different
+filename under one of three conditions:
+
+ * To make both pieces of a conflict available to a user during conflict
+ resolution (examples: directory/file conflict, add/add type conflict
+ such as symlink vs. regular file)
+
+ * When MERGE_SIDE1 renames the file.
+
+ * When MERGE_SIDE2 renames the file.
+
+First, let's remember what commits are involved in the first and second
+picks of the cherry-pick or rebase sequence:
+
+To create A':
+ MERGE_BASE: E
+ MERGE_SIDE1: G
+ MERGE_SIDE2: A
+
+To create B':
+ MERGE_BASE: A
+ MERGE_SIDE1: A'
+ MERGE_SIDE2: B
+
+So, in particular, we need to show that the renames between E and G are a
+superset of those between A and A'.
+
+A' is created by the first merge. A' will only have renames for one of the
+three reasons listed above. The first case, a conflict, results in a
+situation where the cache is dropped and thus this optimization doesn't
+take effect, so we need not consider that case. The third case, a rename
+on MERGE_SIDE2 (i.e. from G to A), will show up in A' but it also shows up
+in A -- therefore when diffing A and A' that path does not show up as a
+rename. The only remaining way for renames to show up in A' is for the
+rename to come from MERGE_SIDE1. Therefore, all renames between A and A'
+are a subset of those between E and G. Equivalently, all renames between E
+and G are a superset of those between A and A'.
+
+
+=== 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ ===
+=== always also a rename on MERGE_SIDE1 for the next pick. ===
+
+Let's again look at the first two picks:
+
+To create A':
+ MERGE_BASE: E
+ MERGE_SIDE1: G
+ MERGE_SIDE2: A
+
+To create B':
+ MERGE_BASE: A
+ MERGE_SIDE1: A'
+ MERGE_SIDE2: B
+
+Now let's look at any given rename from MERGE_SIDE1 of the first pick, i.e.
+any given rename from E to G. Let's use the filenames 'oldfile' and
+'newfile' for demonstration purposes. That first pick will function as
+follows; when the rename is detected, the merge machinery will do a
+three-way content merge of the following:
+ E:oldfile
+ G:newfile
+ A:oldfile
+and produce a new result:
+ A':newfile
+
+Note above that I've assumed that E->A did not rename oldfile. If that
+side did rename, then we most likely have a rename/rename(1to2) conflict
+that will cause the rebase or cherry-pick operation to halt and drop the
+in-memory cache of renames and thus doesn't need to be considered further.
+In the special case that E->A does rename the file but also renames it to
+newfile, then there is no conflict from the renaming and the merge can
+succeed. In this special case, the rename is not valid to cache because
+the second merge will find A:newfile in the MERGE_BASE (see also the new
+testcases in t6429 with "rename same file identically" in their
+description). So a rename/rename(1to1) needs to be specially handled by
+pruning renames from the cache and decrementing the dir_rename_counts in
+the current and leading directories associated with those renames. Or,
+since these are really rare, one could just take the easy way out and
+disable the remembering renames optimization when a rename/rename(1to1)
+happens.
+
+The previous paragraph handled the cases for E->A renaming oldfile, let's
+continue assuming that oldfile is not renamed in A.
+
+As per the diagram for creating B', MERGE_SIDE1 involves the changes from A
+to A'. So, we are curious whether A:oldfile and A':newfile will be viewed
+as renames. Note that:
+
+ * There will be no A':oldfile (because there could not have been a
+ G:oldfile as we do not do break detection in the merge machinery and
+ G:newfile was detected as a rename, and by the construction of the
+ rename above that merged cleanly, the merge machinery will ensure there
+ is no 'oldfile' in the result).
+
+ * There will be no A:newfile (if there had been, we would have had a
+ rename/add conflict).
+
+ * Clearly A:oldfile and A':newfile are "related" (A':newfile came from a
+ clean three-way content merge involving A:oldfile).
+
+We can also expound on the third point above, by noting that three-way
+content merges can also be viewed as applying the differences between the
+base and one side to the other side. Thus we can view A':newfile as
+having been created by taking the changes between E:oldfile and G:newfile
+(which were detected as being related, i.e. <50% changed) to A:oldfile.
+
+Thus A:oldfile and A':newfile are just as related as E:oldfile and
+G:newfile are -- they have exactly identical differences. Since the latter
+were detected as renames, A:oldfile and A':newfile should also be
+detectable as renames almost always.
+
+
+=== 4. A detailed description of the counter-examples to #3. ===
+
+We already noted in section 3 that rename/rename(1to1) (i.e. both sides
+renaming a file the same way) was one counter-example. The more
+interesting bit, though, is why did we need to use the "almost" qualifier
+when stating that A:oldfile and A':newfile are "almost" always detectable
+as renames?
+
+Let's repeat an earlier point that section 3 made:
+
+ A':newfile was created by applying the changes between E:oldfile and
+ G:newfile to A:oldfile. The changes between E:oldfile and G:newfile were
+ <50% of the size of E:oldfile.
+
+If those changes that were <50% of the size of E:oldfile are also <50% of
+the size of A:oldfile, then A:oldfile and A':newfile will be detectable as
+renames. However, if there is a dramatic size reduction between E:oldfile
+and A:oldfile (but the changes between E:oldfile, G:newfile, and A:oldfile
+still somehow merge cleanly), then traditional rename detection would not
+detect A:oldfile and A':newfile as renames.
+
+Here's an example where that can happen:
+ * E:oldfile had 20 lines
+ * G:newfile added 10 new lines at the beginning of the file
+ * A:oldfile kept the first 3 lines of the file, and deleted all the rest
+then
+ => A':newfile would have 13 lines, 3 of which matches those in A:oldfile.
+E:oldfile -> G:newfile would be detected as a rename, but A:oldfile and
+A':newfile would not be.
+
+
+=== 5. Why the special cases in #4 are still fully reasonable to use to ===
+=== pair up files for three-way content merging in the merge machinery, ===
+=== and why they do not affect the correctness of the merge. ===
+
+In the rename/rename(1to1) case, A:newfile and A':newfile are not renames
+since they use the *same* filename. However, files with the same filename
+are obviously fine to pair up for three-way content merging (the merge
+machinery has never employed break detection). The interesting
+counter-example case is thus not the rename/rename(1to1) case, but the case
+where A did not rename oldfile. That was the case that we spent most of
+the time discussing in sections 3 and 4. The remainder of this section
+will be devoted to that case as well.
+
+So, even if A:oldfile and A':newfile aren't detectable as renames, why is
+it still reasonable to pair them up for three-way content merging in the
+merge machinery? There are multiple reasons:
+
+ * As noted in sections 3 and 4, the diff between A:oldfile and A':newfile
+ is *exactly* the same as the diff between E:oldfile and G:newfile. The
+ latter pair were detected as renames, so it seems unlikely to surprise
+ users for us to treat A:oldfile and A':newfile as renames.
+
+ * In fact, "oldfile" and "newfile" were at one point detected as renames
+ due to how they were constructed in the E..G chain. And we used that
+ information once already in this rebase/cherry-pick. I think users
+ would be unlikely to be surprised at us continuing to treat the files
+ as renames and would quickly understand why we had done so.
+
+ * Marking or declaring files as renames is *not* the end goal for merges.
+ Merges use renames to determine which files make sense to be paired up
+ for three-way content merges.
+
+ * A:oldfile and A':newfile were _already_ paired up in a three-way
+ content merge; that is how A':newfile was created. In fact, that
+ three-way content merge was clean. So using them again in a later
+ three-way content merge seems very reasonable.
+
+However, the above is focusing on the common scenarios. Let's try to look
+at all possible unusual scenarios and compare without the optimization to
+with the optimization. Consider the following theoretical cases; we will
+then dive into each to determine which of them are possible,
+and if so, what they mean:
+
+ 1. Without the optimization, the second merge results in a conflict.
+ With the optimization, the second merge also results in a conflict.
+ Questions: Are the conflicts confusingly different? Better in one case?
+
+ 2. Without the optimization, the second merge results in NO conflict.
+ With the optimization, the second merge also results in NO conflict.
+ Questions: Are the merges the same?
+
+ 3. Without the optimization, the second merge results in a conflict.
+ With the optimization, the second merge results in NO conflict.
+ Questions: Possible? Bug, bugfix, or something else?
+
+ 4. Without the optimization, the second merge results in NO conflict.
+ With the optimization, the second merge results in a conflict.
+ Questions: Possible? Bug, bugfix, or something else?
+
+I'll consider all four cases, but out of order.
+
+The fourth case is impossible. For the code without the remembering
+renames optimization to not get a conflict, B:oldfile would need to exactly
+match A:oldfile -- if it doesn't, there would be a modify/delete conflict.
+If A:oldfile matches B:oldfile exactly, then a three-way content merge
+between A:oldfile, A':newfile, and B:oldfile would have no conflict and
+just give us the version of newfile from A' as the result.
+
+From the same logic as the above paragraph, the second case would indeed
+result in identical merges. When A:oldfile exactly matches B:oldfile, an
+undetected rename would say, "Oh, I see one side didn't modify 'oldfile'
+and the other side deleted it. I'll delete it. And I see you have this
+brand new file named 'newfile' in A', so I'll keep it." That gives the
+same results as three-way content merging A:oldfile, A':newfile, and
+B:oldfile -- a removal of oldfile with the version of newfile from A'
+showing up in the result.
+
+The third case is interesting. It means that A:oldfile and A':newfile were
+not just similar enough, but that the changes between them did not conflict
+with the changes between A:oldfile and B:oldfile. This would validate our
+hunch that the files were similar enough to be used in a three-way content
+merge, and thus seems entirely correct for us to have used them that way.
+(Sidenote: One particular example here may be enlightening. Let's say that
+B was an immediate revert of A. B clearly would have been a clean revert
+of A, since A was B's immediate parent. One would assume that if you can
+pick a commit, you should also be able to cherry-pick its immediate revert.
+However, this is one of those funny corner cases; without this
+optimization, we just successfully picked a commit cleanly, but we are
+unable to cherry-pick its immediate revert due to the size differences
+between E:oldfile and A:oldfile.)
+
+That leaves only the first case to consider -- when we get conflicts both
+with or without the optimization. Without the optimization, we'll have a
+modify/delete conflict, where both A':newfile and B:oldfile are left in the
+tree for the user to deal with and no hints about the potential similarity
+between the two. With the optimization, we'll have a three-way content
+merged A:oldfile, A':newfile, and B:oldfile with conflict markers
+suggesting we thought the files were related but giving the user the chance
+to resolve. As noted above, I don't think users will find us treating
+'oldfile' and 'newfile' as related as a surprise since they were between E
+and G. In any event, though, this case shouldn't be concerning since we
+hit a conflict in both cases, told the user what we know, and asked them to
+resolve it.
+
+So, in summary, case 4 is impossible, case 2 yields the same behavior, and
+cases 1 and 3 seem to provide as good or better behavior with the
+optimization than without.
+
+
+=== 6. Interaction with skipping of "irrelevant" renames ===
+
+Previous optimizations involved skipping rename detection for paths
+considered to be "irrelevant". See for example the following commits:
+
+ * 32a56dfb99 ("merge-ort: precompute subset of sources for which we
+ need rename detection", 2021-03-11)
+ * 2fd9eda462 ("merge-ort: precompute whether directory rename
+ detection is needed", 2021-03-11)
+ * 9bd342137e ("diffcore-rename: determine which relevant_sources are
+ no longer relevant", 2021-03-13)
+
+Relevance is always determined by what the _other_ side of history has
+done, in terms of modifing a file that our side renamed, or adding a
+file to a directory which our side renamed. This means that a path
+that is "irrelevant" when picking the first commit of a series in a
+rebase or cherry-pick, may suddenly become "relevant" when picking the
+next commit.
+
+The upshot of this is that we can only cache rename detection results
+for relevant paths, and need to re-check relevance in subsequent
+commits. If those subsequent commits have additional paths that are
+relevant for rename detection, then we will need to redo rename
+detection -- though we can limit it to the paths for which we have not
+already detected renames.
+
+
+=== 7. Additional items that need to be cached ===
+
+It turns out we have to cache more than just renames; we also cache:
+
+ A) non-renames (i.e. unpaired deletes)
+ B) counts of renames within directories
+ C) sources that were marked as RELEVANT_LOCATION, but which were
+ downgraded to RELEVANT_NO_MORE
+ D) the toplevel trees involved in the merge
+
+These are all stored in struct rename_info, and respectively appear in
+ * cached_pairs (along side actual renames, just with a value of NULL)
+ * dir_rename_counts
+ * cached_irrelevant
+ * merge_trees
+
+The reason for (A) comes from the irrelevant renames skipping
+optimization discussed in section 6. The fact that irrelevant renames
+are skipped means we only get a subset of the potential renames
+detected and subsequent commits may need to run rename detection on
+the upstream side on a subset of the remaining renames (to get the
+renames that are relevant for that later commit). Since unpaired
+deletes are involved in rename detection too, we don't want to
+repeatedly check that those paths remain unpaired on the upstream side
+with every commit we are transplanting.
+
+The reason for (B) is that diffcore_rename_extended() is what
+generates the counts of renames by directory which is needed in
+directory rename detection, and if we don't run
+diffcore_rename_extended() again then we need to have the output from
+it, including dir_rename_counts, from the previous run.
+
+The reason for (C) is that merge-ort's tree traversal will again think
+those paths are relevant (marking them as RELEVANT_LOCATION), but the
+fact that they were downgraded to RELEVANT_NO_MORE means that
+dir_rename_counts already has the information we need for directory
+rename detection. (A path which becomes RELEVANT_CONTENT in a
+subsequent commit will be removed from cached_irrelevant.)
+
+The reason for (D) is that is how we determine whether the remember
+renames optimization can be used. In particular, remembering that our
+sequence of merges looks like:
+
+ Merge 1:
+ MERGE_BASE: E
+ MERGE_SIDE1: G
+ MERGE_SIDE2: A
+ => Creates A'
+
+ Merge 2:
+ MERGE_BASE: A
+ MERGE_SIDE1: A'
+ MERGE_SIDE2: B
+ => Creates B'
+
+It is the fact that the trees A and A' appear both in Merge 1 and in
+Merge 2, with A as a parent of A' that allows this optimization. So
+we store the trees to compare with what we are asked to merge next
+time.
+
+
+=== 8. How directory rename detection interacts with the above and ===
+=== why this optimization is still safe even if ===
+=== merge.directoryRenames is set to "true". ===
+
+As noted in the assumptions section:
+
+ """
+ ...if directory renames do occur, then the default of
+ merge.directoryRenames being set to "conflict" means that the operation
+ will stop for users to resolve the conflicts and the cache will be
+ thrown away, and thus that there won't be an optimization to apply.
+ So, the only reason we need to address directory renames specifically,
+ is that some users will have set merge.directoryRenames to "true" to
+ allow the merges to continue to proceed automatically.
+ """
+
+Let's remember that we need to look at how any given pick affects the next
+one. So let's again use the first two picks from the diagram in section
+one:
+
+ First pick does this three-way merge:
+ MERGE_BASE: E
+ MERGE_SIDE1: G
+ MERGE_SIDE2: A
+ => creates A'
+
+ Second pick does this three-way merge:
+ MERGE_BASE: A
+ MERGE_SIDE1: A'
+ MERGE_SIDE2: B
+ => creates B'
+
+Now, directory rename detection exists so that if one side of history
+renames a directory, and the other side adds a new file to the old
+directory, then the merge (with merge.directoryRenames=true) can move the
+file into the new directory. There are two qualitatively different ways to
+add a new file to an old directory: create a new file, or rename a file
+into that directory. Also, directory renames can be done on either side of
+history, so there are four cases to consider:
+
+ * MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir
+ * MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir
+ * MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir
+ * MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir
+
+One last note before we consider these four cases: There are some
+important properties about how we implement this optimization with
+respect to directory rename detection that we need to bear in mind
+while considering all of these cases:
+
+ * rename caching occurs *after* applying directory renames
+
+ * a rename created by directory rename detection is recorded for the side
+ of history that did the directory rename.
+
+ * dir_rename_counts, the nested map of
+ {oldname => {newname => count}},
+ is cached between runs as well. This basically means that directory
+ rename detection is also cached, though only on the side of history
+ that we cache renames for (MERGE_SIDE1 as far as this document is
+ concerned; see the assumptions section). Two interesting sub-notes
+ about these counts:
+
+ * If we need to perform rename-detection again on the given side (e.g.
+ some paths are relevant for rename detection that weren't before),
+ then we clear dir_rename_counts and recompute it, making use of
+ cached_pairs. The reason it is important to do this is optimizations
+ around RELEVANT_LOCATION exist to prevent us from computing
+ unnecessary renames for directory rename detection and from computing
+ dir_rename_counts for irrelevant directories; but those same renames
+ or directories may become necessary for subsequent merges. The
+ easiest way to "fix up" dir_rename_counts in such cases is to just
+ recompute it.
+
+ * If we prune rename/rename(1to1) entries from the cache, then we also
+ need to update dir_rename_counts to decrement the counts for the
+ involved directory and any relevant parent directories (to undo what
+ update_dir_rename_counts() in diffcore-rename.c incremented when the
+ rename was initially found). If we instead just disable the
+ remembering renames optimization when the exceedingly rare
+ rename/rename(1to1) cases occur, then dir_rename_counts will get
+ re-computed the next time rename detection occurs, as noted above.
+
+ * the side with multiple commits to pick, is the side of history that we
+ do NOT cache renames for. Thus, there are no additional commits to
+ change the number of renames in a directory, except for those done by
+ directory rename detection (which always pad the majority).
+
+ * the "renames" we cache are modified slightly by any directory rename,
+ as noted below.
+
+Now, with those notes out of the way, let's go through the four cases
+in order:
+
+Case 1: MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir
+
+ This case looks like this:
+
+ MERGE_BASE: E, Has olddir/
+ MERGE_SIDE1: G, Renames olddir/ -> newdir/
+ MERGE_SIDE2: A, Adds olddir/newfile
+ => creates A', With newdir/newfile
+
+ MERGE_BASE: A, Has olddir/newfile
+ MERGE_SIDE1: A', Has newdir/newfile
+ MERGE_SIDE2: B, Modifies olddir/newfile
+ => expected B', with threeway-merged newdir/newfile from above
+
+ In this case, with the optimization, note that after the first commit:
+ * MERGE_SIDE1 remembers olddir/ -> newdir/
+ * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile
+ Given the cached rename noted above, the second merge can proceed as
+ expected without needing to perform rename detection from A -> A'.
+
+Case 2: MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir
+
+ This case looks like this:
+ MERGE_BASE: E oldfile, olddir/
+ MERGE_SIDE1: G oldfile, olddir/ -> newdir/
+ MERGE_SIDE2: A oldfile -> olddir/newfile
+ => creates A', With newdir/newfile representing original oldfile
+
+ MERGE_BASE: A olddir/newfile
+ MERGE_SIDE1: A' newdir/newfile
+ MERGE_SIDE2: B modify olddir/newfile
+ => expected B', with threeway-merged newdir/newfile from above
+
+ In this case, with the optimization, note that after the first commit:
+ * MERGE_SIDE1 remembers olddir/ -> newdir/
+ * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile
+ (NOT oldfile -> newdir/newfile; compare to case with
+ (p->status == 'R' && new_path) in possibly_cache_new_pair())
+
+ Given the cached rename noted above, the second merge can proceed as
+ expected without needing to perform rename detection from A -> A'.
+
+Case 3: MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir
+
+ This case looks like this:
+
+ MERGE_BASE: E, Has olddir/
+ MERGE_SIDE1: G, Adds olddir/newfile
+ MERGE_SIDE2: A, Renames olddir/ -> newdir/
+ => creates A', With newdir/newfile
+
+ MERGE_BASE: A, Has newdir/, but no notion of newdir/newfile
+ MERGE_SIDE1: A', Has newdir/newfile
+ MERGE_SIDE2: B, Has newdir/, but no notion of newdir/newfile
+ => expected B', with newdir/newfile from A'
+
+ In this case, with the optimization, note that after the first commit there
+ were no renames on MERGE_SIDE1, and any renames on MERGE_SIDE2 are tossed.
+ But the second merge didn't need any renames so this is fine.
+
+Case 4: MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir
+
+ This case looks like this:
+
+ MERGE_BASE: E, Has olddir/
+ MERGE_SIDE1: G, Renames oldfile -> olddir/newfile
+ MERGE_SIDE2: A, Renames olddir/ -> newdir/
+ => creates A', With newdir/newfile representing original oldfile
+
+ MERGE_BASE: A, Has oldfile
+ MERGE_SIDE1: A', Has newdir/newfile
+ MERGE_SIDE2: B, Modifies oldfile
+ => expected B', with threeway-merged newdir/newfile from above
+
+ In this case, with the optimization, note that after the first commit:
+ * MERGE_SIDE1 remembers oldfile -> newdir/newfile
+ (NOT oldfile -> olddir/newfile; compare to case of second
+ block under p->status == 'R' in possibly_cache_new_pair())
+ * MERGE_SIDE2 renames are tossed because only MERGE_SIDE1 is remembered
+
+ Given the cached rename noted above, the second merge can proceed as
+ expected without needing to perform rename detection from A -> A'.
+
+Finally, I'll just note here that interactions with the
+skip-irrelevant-renames optimization means we sometimes don't detect
+renames for any files within a directory that was renamed, in which
+case we will not have been able to detect any rename for the directory
+itself. In such a case, we do not know whether the directory was
+renamed; we want to be careful to avoid cacheing some kind of "this
+directory was not renamed" statement. If we did, then a subsequent
+commit being rebased could add a file to the old directory, and the
+user would expect it to end up in the correct directory -- something
+our erroneous "this directory was not renamed" cache would preclude.
diff --git a/Documentation/technical/sparse-index.txt b/Documentation/technical/sparse-index.txt
new file mode 100644
index 0000000000..3b24c1a219
--- /dev/null
+++ b/Documentation/technical/sparse-index.txt
@@ -0,0 +1,208 @@
+Git Sparse-Index Design Document
+================================
+
+The sparse-checkout feature allows users to focus a working directory on
+a subset of the files at HEAD. The cone mode patterns, enabled by
+`core.sparseCheckoutCone`, allow for very fast pattern matching to
+discover which files at HEAD belong in the sparse-checkout cone.
+
+Three important scale dimensions for a Git working directory are:
+
+* `HEAD`: How many files are present at `HEAD`?
+
+* Populated: How many files are within the sparse-checkout cone.
+
+* Modified: How many files has the user modified in the working directory?
+
+We will use big-O notation -- O(X) -- to denote how expensive certain
+operations are in terms of these dimensions.
+
+These dimensions are ordered by their magnitude: users (typically) modify
+fewer files than are populated, and we can only populate files at `HEAD`.
+
+Problems occur if there is an extreme imbalance in these dimensions. For
+example, if `HEAD` contains millions of paths but the populated set has
+only tens of thousands, then commands like `git status` and `git add` can
+be dominated by operations that require O(`HEAD`) operations instead of
+O(Populated). Primarily, the cost is in parsing and rewriting the index,
+which is filled primarily with files at `HEAD` that are marked with the
+`SKIP_WORKTREE` bit.
+
+The sparse-index intends to take these commands that read and modify the
+index from O(`HEAD`) to O(Populated). To do this, we need to modify the
+index format in a significant way: add "sparse directory" entries.
+
+With cone mode patterns, it is possible to detect when an entire
+directory will have its contents outside of the sparse-checkout definition.
+Instead of listing all of the files it contains as individual entries, a
+sparse-index contains an entry with the directory name, referencing the
+object ID of the tree at `HEAD` and marked with the `SKIP_WORKTREE` bit.
+If we need to discover the details for paths within that directory, we
+can parse trees to find that list.
+
+At time of writing, sparse-directory entries violate expectations about the
+index format and its in-memory data structure. There are many consumers in
+the codebase that expect to iterate through all of the index entries and
+see only files. In fact, these loops expect to see a reference to every
+staged file. One way to handle this is to parse trees to replace a
+sparse-directory entry with all of the files within that tree as the index
+is loaded. However, parsing trees is slower than parsing the index format,
+so that is a slower operation than if we left the index alone. The plan is
+to make all of these integrations "sparse aware" so this expansion through
+tree parsing is unnecessary and they use fewer resources than when using a
+full index.
+
+The implementation plan below follows four phases to slowly integrate with
+the sparse-index. The intention is to incrementally update Git commands to
+interact safely with the sparse-index without significant slowdowns. This
+may not always be possible, but the hope is that the primary commands that
+users need in their daily work are dramatically improved.
+
+Phase I: Format and initial speedups
+------------------------------------
+
+During this phase, Git learns to enable the sparse-index and safely parse
+one. Protections are put in place so that every consumer of the in-memory
+data structure can operate with its current assumption of every file at
+`HEAD`.
+
+At first, every index parse will call a helper method,
+`ensure_full_index()`, which scans the index for sparse-directory entries
+(pointing to trees) and replaces them with the full list of paths (with
+blob contents) by parsing tree objects. This will be slower in all cases.
+The only noticeable change in behavior will be that the serialized index
+file contains sparse-directory entries.
+
+To start, we use a new required index extension, `sdir`, to allow
+inserting sparse-directory entries into indexes with file format
+versions 2, 3, and 4. This prevents Git versions that do not understand
+the sparse-index from operating on one, while allowing tools that do not
+understand the sparse-index to operate on repositories as long as they do
+not interact with the index. A new format, index v5, will be introduced
+that includes sparse-directory entries by default. It might also
+introduce other features that have been considered for improving the
+index, as well.
+
+Next, consumers of the index will be guarded against operating on a
+sparse-index by inserting calls to `ensure_full_index()` or
+`expand_index_to_path()`. If a specific path is requested, then those will
+be protected from within the `index_file_exists()` and `index_name_pos()`
+API calls: they will call `ensure_full_index()` if necessary. The
+intention here is to preserve existing behavior when interacting with a
+sparse-checkout. We don't want a change to happen by accident, without
+tests. Many of these locations may not need any change before removing the
+guards, but we should not do so without tests to ensure the expected
+behavior happens.
+
+It may be desirable to _change_ the behavior of some commands in the
+presence of a sparse index or more generally in any sparse-checkout
+scenario. In such cases, these should be carefully communicated and
+tested. No such behavior changes are intended during this phase.
+
+During a scan of the codebase, not every iteration of the cache entries
+needs an `ensure_full_index()` check. The basic reasons include:
+
+1. The loop is scanning for entries with non-zero stage. These entries
+ are not collapsed into a sparse-directory entry.
+
+2. The loop is scanning for submodules. These entries are not collapsed
+ into a sparse-directory entry.
+
+3. The loop is part of the index API, especially around reading or
+ writing the format.
+
+4. The loop is checking for correct order of cache entries and that is
+ correct if and only if the sparse-directory entries are in the correct
+ location.
+
+5. The loop ignores entries with the `SKIP_WORKTREE` bit set, or is
+ otherwise already aware of sparse directory entries.
+
+6. The sparse-index is disabled at this point when using the split-index
+ feature, so no effort is made to protect the split-index API.
+
+Even after inserting these guards, we will keep expanding sparse-indexes
+for most Git commands using the `command_requires_full_index` repository
+setting. This setting will be on by default and disabled one builtin at a
+time until we have sufficient confidence that all of the index operations
+are properly guarded.
+
+To complete this phase, the commands `git status` and `git add` will be
+integrated with the sparse-index so that they operate with O(Populated)
+performance. They will be carefully tested for operations within and
+outside the sparse-checkout definition.
+
+Phase II: Careful integrations
+------------------------------
+
+This phase focuses on ensuring that all index extensions and APIs work
+well with a sparse-index. This requires significant increases to our test
+coverage, especially for operations that interact with the working
+directory outside of the sparse-checkout definition. Some of these
+behaviors may not be the desirable ones, such as some tests already
+marked for failure in `t1092-sparse-checkout-compatibility.sh`.
+
+The index extensions that may require special integrations are:
+
+* FS Monitor
+* Untracked cache
+
+While integrating with these features, we should look for patterns that
+might lead to better APIs for interacting with the index. Coalescing
+common usage patterns into an API call can reduce the number of places
+where sparse-directories need to be handled carefully.
+
+Phase III: Important command speedups
+-------------------------------------
+
+At this point, the patterns for testing and implementing sparse-directory
+logic should be relatively stable. This phase focuses on updating some of
+the most common builtins that use the index to operate as O(Populated).
+Here is a potential list of commands that could be valuable to integrate
+at this point:
+
+* `git commit`
+* `git checkout`
+* `git merge`
+* `git rebase`
+
+Hopefully, commands such as `git merge` and `git rebase` can benefit
+instead from merge algorithms that do not use the index as a data
+structure, such as the merge-ORT strategy. As these topics mature, we
+may enable the ORT strategy by default for repositories using the
+sparse-index feature.
+
+Along with `git status` and `git add`, these commands cover the majority
+of users' interactions with the working directory. In addition, we can
+integrate with these commands:
+
+* `git grep`
+* `git rm`
+
+These have been proposed as some whose behavior could change when in a
+repo with a sparse-checkout definition. It would be good to include this
+behavior automatically when using a sparse-index. Some clarity is needed
+to make the behavior switch clear to the user.
+
+This phase is the first where parallel work might be possible without too
+much conflicts between topics.
+
+Phase IV: The long tail
+-----------------------
+
+This last phase is less a "phase" and more "the new normal" after all of
+the previous work.
+
+To start, the `command_requires_full_index` option could be removed in
+favor of expanding only when hitting an API guard.
+
+There are many Git commands that could use special attention to operate as
+O(Populated), while some might be so rare that it is acceptable to leave
+them with additional overhead when a sparse-index is present.
+
+Here are some commands that might be useful to update:
+
+* `git sparse-checkout set`
+* `git am`
+* `git clean`
+* `git stash`
diff --git a/Documentation/user-manual.txt b/Documentation/user-manual.txt
index fd480b8645..f9e54b8674 100644
--- a/Documentation/user-manual.txt
+++ b/Documentation/user-manual.txt
@@ -1,5 +1,8 @@
= Git User Manual
+[preface]
+== Introduction
+
Git is a fast distributed revision control system.
This manual is designed to be readable by someone with basic UNIX