diff options
Diffstat (limited to 'Documentation')
69 files changed, 2177 insertions, 293 deletions
diff --git a/Documentation/CodingGuidelines b/Documentation/CodingGuidelines index 45465bc0c9..e3af089ecf 100644 --- a/Documentation/CodingGuidelines +++ b/Documentation/CodingGuidelines @@ -175,6 +175,11 @@ For shell scripts specifically (not exhaustive): does not have such a problem. + - Even though "local" is not part of POSIX, we make heavy use of it + in our test suite. We do not use it in scripted Porcelains, and + hopefully nobody starts using "local" before they are reimplemented + in C ;-) + For C programs: @@ -498,7 +503,12 @@ Error Messages - Do not end error messages with a full stop. - - Do not capitalize ("unable to open %s", not "Unable to open %s") + - Do not capitalize the first word, only because it is the first word + in the message ("unable to open %s", not "Unable to open %s"). But + "SHA-3 not supported" is fine, because the reason the first word is + capitalized is not because it is at the beginning of the sentence, + but because the word would be spelled in capital letters even when + it appeared in the middle of the sentence. - Say what the error is first ("cannot open %s", not "%s: cannot open") diff --git a/Documentation/Makefile b/Documentation/Makefile index 874a01d7a8..f5605b7767 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -2,6 +2,8 @@ MAN1_TXT = MAN5_TXT = MAN7_TXT = +HOWTO_TXT = +DOC_DEP_TXT = TECH_DOCS = ARTICLES = SP_ARTICLES = @@ -42,6 +44,11 @@ MAN7_TXT += gittutorial-2.txt MAN7_TXT += gittutorial.txt MAN7_TXT += gitworkflows.txt +HOWTO_TXT += $(wildcard howto/*.txt) + +DOC_DEP_TXT += $(wildcard *.txt) +DOC_DEP_TXT += $(wildcard config/*.txt) + ifdef MAN_FILTER MAN_TXT = $(filter $(MAN_FILTER),$(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)) else @@ -91,6 +98,7 @@ TECH_DOCS += technical/multi-pack-index TECH_DOCS += technical/pack-format TECH_DOCS += technical/pack-heuristics TECH_DOCS += technical/pack-protocol +TECH_DOCS += technical/parallel-checkout TECH_DOCS += technical/partial-clone TECH_DOCS += technical/protocol-capabilities TECH_DOCS += technical/protocol-common @@ -131,6 +139,7 @@ ASCIIDOC_CONF = -f asciidoc.conf ASCIIDOC_COMMON = $(ASCIIDOC) $(ASCIIDOC_EXTRA) $(ASCIIDOC_CONF) \ -amanversion=$(GIT_VERSION) \ -amanmanual='Git Manual' -amansource='Git' +ASCIIDOC_DEPS = asciidoc.conf GIT-ASCIIDOCFLAGS TXT_TO_HTML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_HTML) TXT_TO_XML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_DOCBOOK) MANPAGE_XSL = manpage-normal.xsl @@ -185,6 +194,7 @@ ASCIIDOC_DOCBOOK = docbook5 ASCIIDOC_EXTRA += -acompat-mode -atabsize=8 ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions ASCIIDOC_EXTRA += -alitdd='&\#x2d;&\#x2d;' +ASCIIDOC_DEPS = asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS DBLATEX_COMMON = XMLTO_EXTRA += --skip-validation XMLTO_EXTRA += -x manpage.xsl @@ -285,10 +295,8 @@ docdep_prereqs = \ mergetools-list.made $(mergetools_txt) \ cmd-list.made $(cmds_txt) -doc.dep : $(docdep_prereqs) $(wildcard *.txt) $(wildcard config/*.txt) build-docdep.perl - $(QUIET_GEN)$(RM) $@+ $@ && \ - $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ - mv $@+ $@ +doc.dep : $(docdep_prereqs) $(DOC_DEP_TXT) build-docdep.perl + $(QUIET_GEN)$(PERL_PATH) ./build-docdep.perl >$@ $(QUIET_STDERR) ifneq ($(MAKECMDGOALS),clean) -include doc.dep @@ -308,8 +316,7 @@ cmds_txt = cmds-ancillaryinterrogators.txt \ $(cmds_txt): cmd-list.made cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) - $(QUIET_GEN)$(RM) $@ && \ - $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \ + $(QUIET_GEN)$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \ date >$@ mergetools_txt = mergetools-diff.txt mergetools-merge.txt @@ -317,7 +324,7 @@ mergetools_txt = mergetools-diff.txt mergetools-merge.txt $(mergetools_txt): mergetools-list.made mergetools-list.made: ../git-mergetool--lib.sh $(wildcard ../mergetools/*) - $(QUIET_GEN)$(RM) $@ && \ + $(QUIET_GEN) \ $(SHELL_PATH) -c 'MERGE_TOOLS_DIR=../mergetools && \ . ../git-mergetool--lib.sh && \ show_tool_names can_diff "* " || :' >mergetools-diff.txt && \ @@ -346,32 +353,23 @@ clean: $(RM) manpage-base-url.xsl $(RM) GIT-ASCIIDOCFLAGS -$(MAN_HTML): %.html : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_HTML) -d manpage -o $@+ $< && \ - mv $@+ $@ +$(MAN_HTML): %.html : %.txt $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_HTML) -d manpage -o $@ $< -$(OBSOLETE_HTML): %.html : %.txto asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_HTML) -o $@+ $< && \ - mv $@+ $@ +$(OBSOLETE_HTML): %.html : %.txto $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_HTML) -o $@ $< manpage-base-url.xsl: manpage-base-url.xsl.in $(QUIET_GEN)sed "s|@@MAN_BASE_URL@@|$(MAN_BASE_URL)|" $< > $@ %.1 %.5 %.7 : %.xml manpage-base-url.xsl $(wildcard manpage*.xsl) - $(QUIET_XMLTO)$(RM) $@ && \ - $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + $(QUIET_XMLTO)$(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< -%.xml : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_XML) -d manpage -o $@+ $< && \ - mv $@+ $@ +%.xml : %.txt $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_XML) -d manpage -o $@ $< user-manual.xml: user-manual.txt user-manual.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_XML) -d book -o $@+ $< && \ - mv $@+ $@ + $(QUIET_ASCIIDOC)$(TXT_TO_XML) -d book -o $@ $< technical/api-index.txt: technical/api-index-skel.txt \ technical/api-index.sh $(patsubst %,%.txt,$(API_DOCS)) @@ -392,46 +390,35 @@ XSLTOPTS += --stringparam html.stylesheet docbook-xsl.css XSLTOPTS += --param generate.consistent.ids 1 user-manual.html: user-manual.xml $(XSLT) - $(QUIET_XSLTPROC)$(RM) $@+ $@ && \ - xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \ - mv $@+ $@ + $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< git.info: user-manual.texi $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi user-manual.texi: user-manual.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ - $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ + $(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@+ && \ + $(PERL_PATH) fix-texi.perl <$@+ >$@ && \ + $(RM) $@+ user-manual.pdf: user-manual.xml - $(QUIET_DBLATEX)$(RM) $@+ $@ && \ - $(DBLATEX) -o $@+ $(DBLATEX_COMMON) $< && \ - mv $@+ $@ + $(QUIET_DBLATEX)$(DBLATEX) -o $@ $(DBLATEX_COMMON) $< gitman.texi: $(MAN_XML) cat-texi.perl texi.xsl - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ + $(QUIET_DB2TEXI) \ ($(foreach xml,$(sort $(MAN_XML)),xsltproc -o $(xml)+ texi.xsl $(xml) && \ $(DOCBOOK2X_TEXI) --encoding=UTF-8 --to-stdout $(xml)+ && \ - rm $(xml)+ &&) true) > $@++ && \ - $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ + $(RM) $(xml)+ &&) true) > $@+ && \ + $(PERL_PATH) cat-texi.perl $@ <$@+ >$@ && \ + $(RM) $@+ gitman.info: gitman.texi $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \ - mv $@+ $@ + $(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@ -howto-index.txt: howto-index.sh $(wildcard howto/*.txt) - $(QUIET_GEN)$(RM) $@+ $@ && \ - '$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(wildcard howto/*.txt)) >$@+ && \ - mv $@+ $@ +howto-index.txt: howto-index.sh $(HOWTO_TXT) + $(QUIET_GEN)'$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt $(QUIET_ASCIIDOC)$(TXT_TO_HTML) $*.txt @@ -439,11 +426,10 @@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt WEBDOC_DEST = /pub/software/scm/git/docs howto/%.html: ASCIIDOC_EXTRA += -a git-relative-html-prefix=../ -$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ +$(patsubst %.txt,%.html,$(HOWTO_TXT)): %.html : %.txt GIT-ASCIIDOCFLAGS + $(QUIET_ASCIIDOC) \ sed -e '1,/^$$/d' $< | \ - $(TXT_TO_HTML) - >$@+ && \ - mv $@+ $@ + $(TXT_TO_HTML) - >$@ install-webdoc : html '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) @@ -471,11 +457,20 @@ print-man1: @for i in $(MAN1_TXT); do echo $$i; done lint-docs:: - $(QUIET_LINT)$(PERL_PATH) lint-gitlink.perl + $(QUIET_LINT)$(PERL_PATH) lint-gitlink.perl \ + $(HOWTO_TXT) $(DOC_DEP_TXT) \ + --section=1 $(MAN1_TXT) \ + --section=5 $(MAN5_TXT) \ + --section=7 $(MAN7_TXT); \ + $(PERL_PATH) lint-man-end-blurb.perl $(MAN_TXT); \ + $(PERL_PATH) lint-man-section-order.perl $(MAN_TXT); ifeq ($(wildcard po/Makefile),po/Makefile) doc-l10n install-l10n:: $(MAKE) -C po $@ endif +# Delete the target file on error +.DELETE_ON_ERROR: + .PHONY: FORCE diff --git a/Documentation/MyFirstContribution.txt b/Documentation/MyFirstContribution.txt index af0a9da62e..015cf24631 100644 --- a/Documentation/MyFirstContribution.txt +++ b/Documentation/MyFirstContribution.txt @@ -47,7 +47,7 @@ Veteran contributors who are especially interested in helping mentor newcomers are present on the list. In order to avoid search indexers, group membership is required to view messages; anyone can join and no approval is required. -==== https://webchat.freenode.net/#git-devel[#git-devel] on Freenode +==== https://web.libera.chat/#git-devel[#git-devel] on Libera Chat This IRC channel is for conversations between Git contributors. If someone is currently online and knows the answer to your question, you can receive help @@ -827,7 +827,7 @@ either examining recent pull requests where someone has been granted `/allow` (https://github.com/gitgitgadget/git/pulls?utf8=%E2%9C%93&q=is%3Apr+is%3Aopen+%22%2Fallow%22[Search: is:pr is:open "/allow"]), in which case both the author and the person who granted the `/allow` can now `/allow` you, or by inquiring on the -https://webchat.freenode.net/#git-devel[#git-devel] IRC channel on Freenode +https://web.libera.chat/#git-devel[#git-devel] IRC channel on Libera Chat linking your pull request and asking for someone to `/allow` you. If the CI fails, you can update your changes with `git rebase -i` and push your diff --git a/Documentation/RelNotes/1.6.0.3.txt b/Documentation/RelNotes/1.6.0.3.txt index ae0577836a..ad36c0f0b7 100644 --- a/Documentation/RelNotes/1.6.0.3.txt +++ b/Documentation/RelNotes/1.6.0.3.txt @@ -50,7 +50,7 @@ Fixes since v1.6.0.2 if the working tree is currently dirty. * "git for-each-ref --format=%(subject)" fixed for commits with no - no newline in the message body. + newline in the message body. * "git remote" fixed to protect printf from user input. diff --git a/Documentation/RelNotes/1.8.4.txt b/Documentation/RelNotes/1.8.4.txt index 255e185af6..2e7529928b 100644 --- a/Documentation/RelNotes/1.8.4.txt +++ b/Documentation/RelNotes/1.8.4.txt @@ -365,7 +365,7 @@ details). (merge 2fbd4f9 mh/maint-lockfile-overflow later to maint). * Invocations of "git checkout" used internally by "git rebase" were - counted as "checkout", and affected later "git checkout -" to the + counted as "checkout", and affected later "git checkout -", which took the user to an unexpected place. (merge 3bed291 rr/rebase-checkout-reflog later to maint). diff --git a/Documentation/RelNotes/2.29.0.txt b/Documentation/RelNotes/2.29.0.txt index 06ba2f803f..1f41302ebb 100644 --- a/Documentation/RelNotes/2.29.0.txt +++ b/Documentation/RelNotes/2.29.0.txt @@ -184,8 +184,8 @@ Performance, Internal Implementation, Development Support etc. the ref backend in use, as its format is much richer than the normal refs, and written directly by "git fetch" as a plain file.. - * An unused binary has been discarded, and and a bunch of commands - have been turned into into built-in. + * An unused binary has been discarded, and a bunch of commands + have been turned into built-in. * A handful of places in in-tree code still relied on being able to execute the git subcommands, especially built-ins, in "git-foo" diff --git a/Documentation/RelNotes/2.32.0.txt b/Documentation/RelNotes/2.32.0.txt index ea220f2a51..87d56fa1aa 100644 --- a/Documentation/RelNotes/2.32.0.txt +++ b/Documentation/RelNotes/2.32.0.txt @@ -7,6 +7,13 @@ Backward compatibility notes * ".gitattributes", ".gitignore", and ".mailmap" files that are symbolic links are ignored. + * "git apply --3way" used to first attempt a straight application, + and only fell back to the 3-way merge algorithm when the stright + application failed. Starting with this version, the command will + first try the 3-way merge algorithm and only when it fails (either + resulting with conflict or the base versions of blobs are missing), + falls back to the usual patch application. + Updates since v2.31 ------------------- @@ -40,11 +47,6 @@ UI, Workflows & Features tweak both the message and the contents, and only the message, respectively. - * When accessing a server with a URL like https://user:pass@site/, we - did not to fall back to the basic authentication with the - credential material embedded in the URL after the "Negotiate" - authentication failed. Now we do. - * "git send-email" learned to honor the core.hooksPath configuration. * "git format-patch -v<n>" learned to allow a reroll count that is @@ -75,6 +77,57 @@ UI, Workflows & Features * The command line completion (in contrib/) has learned that CHERRY_PICK_HEAD is a possible pseudo-ref. + * Userdiff patterns for "Scheme" has been added. + + * "git log" learned "--diff-merges=<style>" option, with an + associated configuration variable log.diffMerges. + + * "git log --format=..." placeholders learned %ah/%ch placeholders to + request the --date=human output. + + * Replace GIT_CONFIG_NOSYSTEM mechanism to decline from reading the + system-wide configuration file with GIT_CONFIG_SYSTEM that lets + users specify from which file to read the system-wide configuration + (setting it to an empty file would essentially be the same as + setting NOSYSTEM), and introduce GIT_CONFIG_GLOBAL to override the + per-user configuration in $HOME/.gitconfig. + + * "git add" and "git rm" learned not to touch those paths that are + outside of sparse checkout. + + * "git rev-list" learns the "--filter=object:type=<type>" option, + which can be used to exclude objects of the given kind from the + packfile generated by pack-objects. + + * The command line completion (in contrib/) for "git stash" has been + updated. + + * "git subtree" updates. + + * It is now documented that "format-patch" skips merges. + + * Options to "git pack-objects" that take numeric values like + --window and --depth should not accept negative values; the input + validation has been tightened. + + * The way the command line specified by the trailer.<token>.command + configuration variable receives the end-user supplied value was + both error prone and misleading. An alternative to achieve the + same goal in a safer and more intuitive way has been added, as + the trailer.<token>.cmd configuration variable, to replace it. + + * "git add -i --dry-run" does not dry-run, which was surprising. The + combination of options has taught to error out. + + * "git push" learns to discover common ancestor with the receiving + end over protocol v2. This will hopefully make "git push" as + efficient as "git fetch" in avoiding objects from getting + transferred unnecessarily. + + * "git mailinfo" (hence "git am") learned the "--quoted-cr" option to + control how lines ending with CRLF wrapped in base64 or qp are + handled. + Performance, Internal Implementation, Development Support etc. @@ -120,6 +173,38 @@ Performance, Internal Implementation, Development Support etc. * The last remnant of gettext-poison has been removed. + * The test framework has been taught to optionally turn the default + merge strategy to "ort" throughout the system where we use + three-way merges internally, like cherry-pick, rebase etc., + primarily to enhance its test coverage (the strategy has been + available as an explicit "-s ort" choice). + + * A bit of code clean-up and a lot of test clean-up around userdiff + area. + + * Handling of "promisor packs" that allows certain objects to be + missing and lazily retrievable has been optimized (a bit). + + * When packet_write() fails, we gave an extra error message + unnecessarily, which has been corrected. + + * The checkout machinery has been taught to perform the actual + write-out of the files in parallel when able. + + * Show errno in the trace output in the error codepath that calls + read_raw_ref method. + + * Effort to make the command line completion (in contrib/) safe with + "set -u" continues. + + * Tweak a few tests for "log --format=..." that show timestamps in + various formats. + + * The reflog expiry machinery has been taught to emit trace events. + + * Over-the-wire protocol learns a new request type to ask for object + sizes given a list of object names. + Fixes since v2.31 ----------------- @@ -209,6 +294,102 @@ Fixes since v2.31 option. (merge c5c0548d79 vs/completion-with-set-u later to maint). + * When "git pack-objects" makes a literal copy of a part of existing + packfile using the reachability bitmaps, its update to the progress + meter was broken. + (merge 8e118e8490 jk/pack-objects-bitmap-progress-fix later to maint). + + * The dependencies for config-list.h and command-list.h were broken + when the former was split out of the latter, which has been + corrected. + (merge 56550ea718 sg/bugreport-fixes later to maint). + + * "git push --quiet --set-upstream" was not quiet when setting the + upstream branch configuration, which has been corrected. + (merge f3cce896a8 ow/push-quiet-set-upstream later to maint). + + * The prefetch task in "git maintenance" assumed that "git fetch" + from any remote would fetch all its local branches, which would + fetch too much if the user is interested in only a subset of + branches there. + (merge 32f67888d8 ds/maintenance-prefetch-fix later to maint). + + * Clarify that pathnames recorded in Git trees are most often (but + not necessarily) encoded in UTF-8. + (merge 9364bf465d ab/pathname-encoding-doc later to maint). + + * "git --config-env var=val cmd" weren't accepted (only + --config-env=var=val was). + (merge c331551ccf ps/config-env-option-with-separate-value later to maint). + + * When the reachability bitmap is in effect, the "do not lose + recently created objects and those that are reachable from them" + safety to protect us from races were disabled by mistake, which has + been corrected. + (merge 2ba582ba4c jk/prune-with-bitmap-fix later to maint). + + * Cygwin pathname handling fix. + (merge bccc37fdc7 ad/cygwin-no-backslashes-in-paths later to maint). + + * "git rebase --[no-]reschedule-failed-exec" did not work well with + its configuration variable, which has been corrected. + (merge e5b32bffd1 ab/rebase-no-reschedule-failed-exec later to maint). + + * Portability fix for command line completion script (in contrib/). + (merge f2acf763e2 si/zsh-complete-comment-fix later to maint). + + * "git repack -A -d" in a partial clone unnecessarily loosened + objects in promisor pack. + + * "git bisect skip" when custom words are used for new/old did not + work, which has been corrected. + + * A few variants of informational message "Already up-to-date" has + been rephrased. + (merge ad9322da03 js/merge-already-up-to-date-message-reword later to maint). + + * "git submodule update --quiet" did not propagate the quiet option + down to underlying "git fetch", which has been corrected. + (merge 62af4bdd42 nc/submodule-update-quiet later to maint). + + * Document that our test can use "local" keyword. + (merge a84fd3bcc6 jc/test-allows-local later to maint). + + * The word-diff mode has been taught to work better with a word + regexp that can match an empty string. + (merge 0324e8fc6b pw/word-diff-zero-width-matches later to maint). + + * "git p4" learned to find branch points more efficiently. + (merge 6b79818bfb jk/p4-locate-branch-point-optim later to maint). + + * When "git update-ref -d" removes a ref that is packed, it left + empty directories under $GIT_DIR/refs/ for + (merge 5f03e5126d wc/packed-ref-removal-cleanup later to maint). + + * "git clean" and "git ls-files -i" had confusion around working on + or showing ignored paths inside an ignored directory, which has + been corrected. + (merge b548f0f156 en/dir-traversal later to maint). + + * The handling of "%(push)" formatting element of "for-each-ref" and + friends was broken when the same codepath started handling + "%(push:<what>)", which has been corrected. + (merge 1e1c4c5eac zh/ref-filter-push-remote-fix later to maint). + + * The bash prompt script (in contrib/) did not work under "set -u". + (merge 5c0cbdb107 en/prompt-under-set-u later to maint). + + * The "chainlint" feature in the test framework is a handy way to + catch common mistakes in writing new tests, but tends to get + expensive. An knob to selectively disable it has been introduced + to help running tests that the developer has not modified. + (merge 2d86a96220 jk/test-chainlint-softer later to maint). + + * The "rev-parse" command did not diagnose the lack of argument to + "--path-format" option, which was introduced in v2.31 era, which + has been corrected. + (merge 99fc555188 wm/rev-parse-path-format-wo-arg later to maint). + * Other code cleanup, docfix, build fix, etc. (merge f451960708 dl/cat-file-doc-cleanup later to maint). (merge 12604a8d0c sv/t9801-test-path-is-file-cleanup later to maint). @@ -223,3 +404,13 @@ Fixes since v2.31 (merge 28e29ee38b jc/doc-format-patch-clarify later to maint). (merge fc12b6fdde fm/user-manual-use-preface later to maint). (merge dba94e3a85 cc/test-helper-bloom-usage-fix later to maint). + (merge 61a7660516 hn/reftable-tables-doc-update later to maint). + (merge 81ed96a9b2 jt/fetch-pack-request-fix later to maint). + (merge 151b6c2dd7 jc/doc-do-not-capitalize-clarification later to maint). + (merge 9160068ac6 js/access-nul-emulation-on-windows later to maint). + (merge 7a14acdbe6 po/diff-patch-doc later to maint). + (merge f91371b948 pw/patience-diff-clean-up later to maint). + (merge 3a7f0908b6 mt/clean-clean later to maint). + (merge d4e2d15a8b ab/streaming-simplify later to maint). + (merge 0e59f7ad67 ah/merge-ort-i18n later to maint). + (merge e6f68f62e0 ls/typofix later to maint). diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt new file mode 100644 index 0000000000..57443c7466 --- /dev/null +++ b/Documentation/RelNotes/2.33.0.txt @@ -0,0 +1,78 @@ +Git 2.33 Release Notes +====================== + +Backward compatibility notes +---------------------------- + + * The "-m" option in "git log -m" that does not specify which format, + if any, of diff is desired did not have any visible effect; it now + implies some form of diff (by default "--patch") is produced. + + You can disable the diff output with "git log -m --no-patch", but + then there probably isn't much point in passing "-m" in the first + place ;-). + + +Updates since Git 2.32 +---------------------- + +UI, Workflows & Features + + * "git send-email" learned the "--sendmail-cmd" command line option + and the "sendemail.sendmailCmd" configuration variable, which is a + more sensible approach than the current way of repurposing the + "smtp-server" that is meant to name the server to instead name the + command to talk to the server. + + * The "-m" option in "git log -m" that does not specify which format, + if any, of diff is desired did not have any visible effect; it now + implies some form of diff (by default "--patch") is produced. + + +Performance, Internal Implementation, Development Support etc. + + * The code to handle the "--format" option in "for-each-ref" and + friends made too many string comparisons on %(atom)s used in the + format string, which has been corrected by converting them into + enum when the format string is parsed. + + * Use the hashfile API in the codepath that writes the index file to + reduce code duplication. + + * Repeated rename detections in a sequence of mergy operations have + been optimize out. + + +Fixes since v2.32 +----------------- + + * We historically rejected a very short string as an author name + while accepting a patch e-mail, which has been loosened. + (merge 72ee47ceeb ef/mailinfo-short-name later to maint). + + * The parallel checkout codepath did not initialize object ID field + used to talk to the worker processes in a futureproof way. + + * Rewrite code that triggers undefined behaviour warning. + (merge aafa5df0df jn/size-t-casted-to-off-t-fix later to maint). + + * The description of "fast-forward" in the glossary has been updated. + (merge e22f2daed0 ry/clarify-fast-forward-in-glossary later to maint). + + * Recent "git clone" left a temporary directory behind when the + transport layer returned an failure. + (merge 6aacb7d861 jk/clone-clean-upon-transport-error later to maint). + + * "git fetch" over protocol v2 left its side of the socket open after + it finished speaking, which unnecessarily wasted the resource on + the other side. + (merge ae1a7eefff jk/fetch-pack-v2-half-close-early later to maint). + + * Other code cleanup, docfix, build fix, etc. + (merge bfe35a6165 ah/doc-describe later to maint). + (merge f302c1e4aa jc/clarify-revision-range later to maint). + (merge 3127ff90ea tl/fix-packfile-uri-doc later to maint). + (merge a84216c684 jk/doc-color-pager later to maint). + (merge 4e0a64a713 ab/trace2-squelch-gcc-warning later to maint). + (merge 225f7fa847 ps/rev-list-object-type-filter later to maint). + (merge 5317dfeaed dd/honor-users-tar-in-tests later to maint). diff --git a/Documentation/RelNotes/2.8.0.txt b/Documentation/RelNotes/2.8.0.txt index 27320b6a9f..38453281b8 100644 --- a/Documentation/RelNotes/2.8.0.txt +++ b/Documentation/RelNotes/2.8.0.txt @@ -377,7 +377,7 @@ notes for details). on that order. * "git show 'HEAD:Foo[BAR]Baz'" did not interpret the argument as a - rev, i.e. the object named by the the pathname with wildcard + rev, i.e. the object named by the pathname with wildcard characters in a tree object. (merge aac4fac nd/dwim-wildcards-as-pathspecs later to maint). diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 0452db2e67..55287d72e0 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -117,10 +117,13 @@ If in doubt which identifier to use, run `git log --no-merges` on the files you are modifying to see the current conventions. [[summary-section]] -It's customary to start the remainder of the first line after "area: " -with a lower-case letter. E.g. "doc: clarify...", not "doc: -Clarify...", or "githooks.txt: improve...", not "githooks.txt: -Improve...". +The title sentence after the "area:" prefix omits the full stop at the +end, and its first word is not capitalized unless there is a reason to +capitalize it other than because it is the first word in the sentence. +E.g. "doc: clarify...", not "doc: Clarify...", or "githooks.txt: +improve...", not "githooks.txt: Improve...". But "refs: HEAD is also +treated as a ref" is correct, as we spell `HEAD` in all caps even when +it appears in the middle of a sentence. [[meaningful-message]] The body should provide a meaningful commit message, which: diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt index acbd0c09aa..8b2849ff7b 100644 --- a/Documentation/config/advice.txt +++ b/Documentation/config/advice.txt @@ -119,4 +119,8 @@ advice.*:: addEmptyPathspec:: Advice shown if a user runs the add command without providing the pathspec parameter. + updateSparsePath:: + Advice shown when either linkgit:git-add[1] or linkgit:git-rm[1] + is asked to update index entries outside the current sparse + checkout. -- diff --git a/Documentation/config/blame.txt b/Documentation/config/blame.txt index 9468e8599c..4d047c1790 100644 --- a/Documentation/config/blame.txt +++ b/Documentation/config/blame.txt @@ -27,7 +27,7 @@ blame.ignoreRevsFile:: file names will reset the list of ignored revisions. This option will be handled before the command line option `--ignore-revs-file`. -blame.markUnblamables:: +blame.markUnblamableLines:: Mark lines that were changed by an ignored revision that we could not attribute to another commit with a '*' in the output of linkgit:git-blame[1]. diff --git a/Documentation/config/checkout.txt b/Documentation/config/checkout.txt index 2cddf7b4b4..bfbca90f0e 100644 --- a/Documentation/config/checkout.txt +++ b/Documentation/config/checkout.txt @@ -21,3 +21,24 @@ checkout.guess:: Provides the default value for the `--guess` or `--no-guess` option in `git checkout` and `git switch`. See linkgit:git-switch[1] and linkgit:git-checkout[1]. + +checkout.workers:: + The number of parallel workers to use when updating the working tree. + The default is one, i.e. sequential execution. If set to a value less + than one, Git will use as many workers as the number of logical cores + available. This setting and `checkout.thresholdForParallelism` affect + all commands that perform checkout. E.g. checkout, clone, reset, + sparse-checkout, etc. ++ +Note: parallel checkout usually delivers better performance for repositories +located on SSDs or over NFS. For repositories on spinning disks and/or machines +with a small number of cores, the default sequential checkout often performs +better. The size and compression level of a repository might also influence how +well the parallel version performs. + +checkout.thresholdForParallelism:: + When running parallel checkout with a small number of files, the cost + of subprocess spawning and inter-process communication might outweigh + the parallelization gains. This setting allows to define the minimum + number of files for which parallel checkout should be attempted. The + default is 100. diff --git a/Documentation/config/color.txt b/Documentation/config/color.txt index d5daacb13a..e05d520a86 100644 --- a/Documentation/config/color.txt +++ b/Documentation/config/color.txt @@ -127,8 +127,9 @@ color.interactive.<slot>:: interactive commands. color.pager:: - A boolean to enable/disable colored output when the pager is in - use (default is true). + A boolean to specify whether `auto` color modes should colorize + output going to the pager. Defaults to true; set this to false + if your pager does not understand ANSI color codes. color.push:: A boolean to enable/disable color in push errors. May be set to diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt index 7cb50b37e9..75f3a2d105 100644 --- a/Documentation/config/index.txt +++ b/Documentation/config/index.txt @@ -14,6 +14,11 @@ index.recordOffsetTable:: Defaults to 'true' if index.threads has been explicitly enabled, 'false' otherwise. +index.sparse:: + When enabled, write the index using sparse-directory entries. This + has no effect unless `core.sparseCheckout` and + `core.sparseCheckoutCone` are both enabled. Defaults to 'false'. + index.threads:: Specifies the number of threads to spawn when loading the index. This is meant to reduce index load time on multiprocessor machines. diff --git a/Documentation/config/log.txt b/Documentation/config/log.txt index 208d5fdcaa..456eb07800 100644 --- a/Documentation/config/log.txt +++ b/Documentation/config/log.txt @@ -24,6 +24,11 @@ log.excludeDecoration:: the config option can be overridden by the `--decorate-refs` option. +log.diffMerges:: + Set default diff format to be used for merge commits. See + `--diff-merges` in linkgit:git-log[1] for details. + Defaults to `separate`. + log.follow:: If `true`, `git log` will act as if the `--follow` option was used when a single <path> is given. This has the same limitations as `--follow`, diff --git a/Documentation/config/merge.txt b/Documentation/config/merge.txt index cb2ed58907..6b66c83eab 100644 --- a/Documentation/config/merge.txt +++ b/Documentation/config/merge.txt @@ -14,7 +14,7 @@ merge.defaultToUpstream:: branches at the remote named by `branch.<current branch>.remote` are consulted, and then they are mapped via `remote.<remote>.fetch` to their corresponding remote-tracking branches, and the tips of - these tracking branches are merged. + these tracking branches are merged. Defaults to true. merge.ff:: By default, Git does not create an extra merge commit when merging diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index c0844d8d8e..763f7af7c4 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -99,12 +99,23 @@ pack.packSizeLimit:: packing to a file when repacking, i.e. the git:// protocol is unaffected. It can be overridden by the `--max-pack-size` option of linkgit:git-repack[1]. Reaching this limit results - in the creation of multiple packfiles; which in turn prevents - bitmaps from being created. - The minimum size allowed is limited to 1 MiB. - The default is unlimited. - Common unit suffixes of 'k', 'm', or 'g' are - supported. + in the creation of multiple packfiles. ++ +Note that this option is rarely useful, and may result in a larger total +on-disk size (because Git will not store deltas between packs), as well +as worse runtime performance (object lookup within multiple packs is +slower than a single pack, and optimizations like reachability bitmaps +cannot cope with multiple packs). ++ +If you need to actively run Git using smaller packfiles (e.g., because your +filesystem does not support large files), this option may help. But if +your goal is to transmit a packfile over a medium that supports limited +sizes (e.g., removable media that cannot store the whole repository), +you are likely better off creating a single large packfile and splitting +it using a generic multi-volume archive tool (e.g., Unix `split`). ++ +The minimum size allowed is limited to 1 MiB. The default is unlimited. +Common unit suffixes of 'k', 'm', or 'g' are supported. pack.useBitmaps:: When true, git will use pack bitmaps (if available) when packing diff --git a/Documentation/config/push.txt b/Documentation/config/push.txt index 21b256e0a4..f2667b2689 100644 --- a/Documentation/config/push.txt +++ b/Documentation/config/push.txt @@ -120,3 +120,10 @@ push.useForceIfIncludes:: `--force-if-includes` as an option to linkgit:git-push[1] in the command line. Adding `--no-force-if-includes` at the time of push overrides this configuration setting. + +push.negotiate:: + If set to "true", attempt to reduce the size of the packfile + sent by rounds of negotiation in which the client and the + server attempt to find commits in common. If "false", Git will + rely solely on the server's ref advertisement to find commits + in common. diff --git a/Documentation/config/stash.txt b/Documentation/config/stash.txt index 413f907cba..9ed775281f 100644 --- a/Documentation/config/stash.txt +++ b/Documentation/config/stash.txt @@ -6,9 +6,9 @@ stash.useBuiltin:: remaining users that setting this now does nothing. stash.showIncludeUntracked:: - If this is set to true, the `git stash show` command without an - option will show the untracked files of a stash entry. Defaults to - false. See description of 'show' command in linkgit:git-stash[1]. + If this is set to true, the `git stash show` command will show + the untracked files of a stash entry. Defaults to false. See + description of 'show' command in linkgit:git-stash[1]. stash.showPatch:: If this is set to true, the `git stash show` command without an diff --git a/Documentation/config/uploadpack.txt b/Documentation/config/uploadpack.txt index b0d761282c..32fad5bbe8 100644 --- a/Documentation/config/uploadpack.txt +++ b/Documentation/config/uploadpack.txt @@ -59,15 +59,16 @@ uploadpack.allowFilter:: uploadpackfilter.allow:: Provides a default value for unspecified object filters (see: the - below configuration variable). + below configuration variable). If set to `true`, this will also + enable all filters which get added in the future. Defaults to `true`. uploadpackfilter.<filter>.allow:: Explicitly allow or ban the object filter corresponding to `<filter>`, where `<filter>` may be one of: `blob:none`, - `blob:limit`, `tree`, `sparse:oid`, or `combine`. If using - combined filters, both `combine` and all of the nested filter - kinds must be allowed. Defaults to `uploadpackfilter.allow`. + `blob:limit`, `object:type`, `tree`, `sparse:oid`, or `combine`. + If using combined filters, both `combine` and all of the nested + filter kinds must be allowed. Defaults to `uploadpackfilter.allow`. uploadpackfilter.tree.maxDepth:: Only allow `--filter=tree:<n>` when `<n>` is no more than the value of diff --git a/Documentation/diff-generate-patch.txt b/Documentation/diff-generate-patch.txt index 2db8eacc3e..c78063d4f7 100644 --- a/Documentation/diff-generate-patch.txt +++ b/Documentation/diff-generate-patch.txt @@ -11,7 +11,7 @@ linkgit:git-diff-files[1] with the `-p` option produces patch text. You can customize the creation of patch text via the `GIT_EXTERNAL_DIFF` and the `GIT_DIFF_OPTS` environment variables -(see linkgit:git[1]). +(see linkgit:git[1]), and the `diff` attribute (see linkgit:gitattributes[5]). What the -p option produces is slightly different from the traditional diff format: @@ -74,6 +74,11 @@ separate lines indicate the old and the new mode. rename from b rename to a +5. Hunk headers mention the name of the function to which the hunk + applies. See "Defining a custom hunk-header" in + linkgit:gitattributes[5] for details of how to tailor to this to + specific languages. + Combined diff format -------------------- diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index aa2b5c11f2..32e6dee5ac 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -34,7 +34,7 @@ endif::git-diff[] endif::git-format-patch[] ifdef::git-log[] ---diff-merges=(off|none|first-parent|1|separate|m|combined|c|dense-combined|cc):: +--diff-merges=(off|none|on|first-parent|1|separate|m|combined|c|dense-combined|cc):: --no-diff-merges:: Specify diff format to be used for merge commits. Default is {diff-merges-default} unless `--first-parent` is in use, in which case @@ -45,17 +45,24 @@ ifdef::git-log[] Disable output of diffs for merge commits. Useful to override implied value. + +--diff-merges=on::: +--diff-merges=m::: +-m::: + This option makes diff output for merge commits to be shown in + the default format. The default format could be changed using + `log.diffMerges` configuration parameter, which default value + is `separate`. `-m` implies `-p`. ++ --diff-merges=first-parent::: --diff-merges=1::: This option makes merge commits show the full diff with respect to the first parent only. + --diff-merges=separate::: ---diff-merges=m::: --m::: This makes merge commits show the full diff with respect to each of the parents. Separate log entry and diff is generated - for each parent. `-m` doesn't produce any output without `-p`. + for each parent. This is the format that `-m` produced + historically. + --diff-merges=combined::: --diff-merges=c::: @@ -293,11 +300,14 @@ explained for the configuration variable `core.quotePath` (see linkgit:git-config[1]). --name-only:: - Show only names of changed files. + Show only names of changed files. The file names are often encoded in UTF-8. + For more information see the discussion about encoding in the linkgit:git-log[1] + manual page. --name-status:: Show only names and status of changed files. See the description of the `--diff-filter` option on what the status letters mean. + Just like `--name-only` the file names are often encoded in UTF-8. --submodule[=<format>]:: Specify how differences in submodules are shown. When specifying diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt index 07783deee3..9e7b4e189c 100644 --- a/Documentation/fetch-options.txt +++ b/Documentation/fetch-options.txt @@ -110,6 +110,11 @@ ifndef::git-pull[] setting `fetch.writeCommitGraph`. endif::git-pull[] +--prefetch:: + Modify the configured refspec to place all refs into the + `refs/prefetch/` namespace. See the `prefetch` task in + linkgit:git-maintenance[1]. + -p:: --prune:: Before fetching, remove any remote-tracking references that no diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt index decd8ae122..8714dfcb76 100644 --- a/Documentation/git-am.txt +++ b/Documentation/git-am.txt @@ -15,6 +15,7 @@ SYNOPSIS [--whitespace=<option>] [-C<n>] [-p<n>] [--directory=<dir>] [--exclude=<path>] [--include=<path>] [--reject] [-q | --quiet] [--[no-]scissors] [-S[<keyid>]] [--patch-format=<format>] + [--quoted-cr=<action>] [(<mbox> | <Maildir>)...] 'git am' (--continue | --skip | --abort | --quit | --show-current-patch[=(diff|raw)]) @@ -59,6 +60,9 @@ OPTIONS --no-scissors:: Ignore scissors lines (see linkgit:git-mailinfo[1]). +--quoted-cr=<action>:: + This flag will be passed down to 'git mailinfo' (see linkgit:git-mailinfo[1]). + -m:: --message-id:: Pass the `-m` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]), diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt index 4b4cc5c5e8..5cddadafd2 100644 --- a/Documentation/git-config.txt +++ b/Documentation/git-config.txt @@ -340,6 +340,11 @@ GIT_CONFIG:: Using the "--global" option forces this to ~/.gitconfig. Using the "--system" option forces this to $(prefix)/etc/gitconfig. +GIT_CONFIG_GLOBAL:: +GIT_CONFIG_SYSTEM:: + Take the configuration from the given files instead from global or + system-level configuration. See linkgit:git[1] for details. + GIT_CONFIG_NOSYSTEM:: Whether to skip reading settings from the system-wide $(prefix)/etc/gitconfig file. See linkgit:git[1] for details. diff --git a/Documentation/git-credential.txt b/Documentation/git-credential.txt index 31c81c4c02..206e3c5f40 100644 --- a/Documentation/git-credential.txt +++ b/Documentation/git-credential.txt @@ -159,3 +159,7 @@ empty string. + Components which are missing from the URL (e.g., there is no username in the example above) will be left unset. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/git-cvsserver.txt b/Documentation/git-cvsserver.txt index 1b1c71ad9d..f2e4a47ebe 100644 --- a/Documentation/git-cvsserver.txt +++ b/Documentation/git-cvsserver.txt @@ -24,6 +24,18 @@ Usage: [verse] 'git-cvsserver' [<options>] [pserver|server] [<directory> ...] +DESCRIPTION +----------- + +This application is a CVS emulation layer for Git. + +It is highly functional. However, not all methods are implemented, +and for those methods that are implemented, +not all switches are implemented. + +Testing has been done using both the CLI CVS client, and the Eclipse CVS +plugin. Most functionality works fine with both of these clients. + OPTIONS ------- @@ -57,18 +69,6 @@ access still needs to be enabled by the `gitcvs.enabled` config option unless `--export-all` was given, too. -DESCRIPTION ------------ - -This application is a CVS emulation layer for Git. - -It is highly functional. However, not all methods are implemented, -and for those methods that are implemented, -not all switches are implemented. - -Testing has been done using both the CLI CVS client, and the Eclipse CVS -plugin. Most functionality works fine with both of these clients. - LIMITATIONS ----------- diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt index a88f6ae2c6..c6a79c2a0f 100644 --- a/Documentation/git-describe.txt +++ b/Documentation/git-describe.txt @@ -63,9 +63,10 @@ OPTIONS Automatically implies --tags. --abbrev=<n>:: - Instead of using the default 7 hexadecimal digits as the - abbreviated object name, use <n> digits, or as many digits - as needed to form a unique object name. An <n> of 0 + Instead of using the default number of hexadecimal digits (which + will vary according to the number of objects in the repository with + a default of 7) of the abbreviated object name, use <n> digits, or + as many digits as needed to form a unique object name. An <n> of 0 will suppress long format, only showing the closest tag. --candidates=<n>:: @@ -139,8 +140,11 @@ at the end. The number of additional commits is the number of commits which would be displayed by "git log v1.0.4..parent". -The hash suffix is "-g" + unambiguous abbreviation for the tip commit -of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). +The hash suffix is "-g" + an unambigous abbreviation for the tip commit +of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). The +length of the abbreviation scales as the repository grows, using the +approximate number of objects in the repository and a bit of math +around the birthday paradox, and defaults to a minimum of 7. The "g" prefix stands for "git" and is used to allow describing the version of a software depending on the SCM the software is managed with. This is useful in an environment where people may use different SCMs. diff --git a/Documentation/git-format-patch.txt b/Documentation/git-format-patch.txt index 911da181a1..fe2f69d36e 100644 --- a/Documentation/git-format-patch.txt +++ b/Documentation/git-format-patch.txt @@ -36,7 +36,7 @@ SYNOPSIS DESCRIPTION ----------- -Prepare each commit with its "patch" in +Prepare each non-merge commit with its "patch" in one "message" per commit, formatted to resemble a UNIX mailbox. The output of this command is convenient for e-mail submission or for use with 'git am'. @@ -740,6 +740,14 @@ use it only when you know the recipient uses Git to apply your patch. $ git format-patch -3 ------------ +CAVEATS +------- + +Note that `format-patch` will omit merge commits from the output, even +if they are part of the requested range. A simple "patch" does not +include enough information for the receiving end to reproduce the same +merge commit. + SEE ALSO -------- linkgit:git-am[1], linkgit:git-send-email[1] diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 4e0ba8234a..3d393fbac1 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -38,38 +38,6 @@ are lists of one or more search expressions separated by newline characters. An empty string as search expression matches all lines. -CONFIGURATION -------------- - -grep.lineNumber:: - If set to true, enable `-n` option by default. - -grep.column:: - If set to true, enable the `--column` option by default. - -grep.patternType:: - Set the default matching behavior. Using a value of 'basic', 'extended', - 'fixed', or 'perl' will enable the `--basic-regexp`, `--extended-regexp`, - `--fixed-strings`, or `--perl-regexp` option accordingly, while the - value 'default' will return to the default matching behavior. - -grep.extendedRegexp:: - If set to true, enable `--extended-regexp` option by default. This - option is ignored when the `grep.patternType` option is set to a value - other than 'default'. - -grep.threads:: - Number of grep worker threads to use. If unset (or set to 0), Git will - use as many threads as the number of logical cores available. - -grep.fullName:: - If set to true, enable `--full-name` option by default. - -grep.fallbackToNoIndex:: - If set to true, fall back to git grep --no-index if git grep - is executed outside of a git repository. Defaults to false. - - OPTIONS ------- --cached:: @@ -363,6 +331,38 @@ with multiple threads might perform slower than single threaded if `--textconv` is given and there're too many text conversions. So if you experience low performance in this case, it might be desirable to use `--threads=1`. +CONFIGURATION +------------- + +grep.lineNumber:: + If set to true, enable `-n` option by default. + +grep.column:: + If set to true, enable the `--column` option by default. + +grep.patternType:: + Set the default matching behavior. Using a value of 'basic', 'extended', + 'fixed', or 'perl' will enable the `--basic-regexp`, `--extended-regexp`, + `--fixed-strings`, or `--perl-regexp` option accordingly, while the + value 'default' will return to the default matching behavior. + +grep.extendedRegexp:: + If set to true, enable `--extended-regexp` option by default. This + option is ignored when the `grep.patternType` option is set to a value + other than 'default'. + +grep.threads:: + Number of grep worker threads to use. If unset (or set to 0), Git will + use as many threads as the number of logical cores available. + +grep.fullName:: + If set to true, enable `--full-name` option by default. + +grep.fallbackToNoIndex:: + If set to true, fall back to git grep --no-index if git grep + is executed outside of a git repository. Defaults to false. + + GIT --- Part of the linkgit:git[1] suite diff --git a/Documentation/git-interpret-trailers.txt b/Documentation/git-interpret-trailers.txt index 96ec6499f0..956a01d184 100644 --- a/Documentation/git-interpret-trailers.txt +++ b/Documentation/git-interpret-trailers.txt @@ -232,25 +232,38 @@ trailer.<token>.ifmissing:: that option for trailers with the specified <token>. trailer.<token>.command:: - This option can be used to specify a shell command that will - be called to automatically add or modify a trailer with the - specified <token>. + This option behaves in the same way as 'trailer.<token>.cmd', except + that it doesn't pass anything as argument to the specified command. + Instead the first occurrence of substring $ARG is replaced by the + value that would be passed as argument. + -When this option is specified, the behavior is as if a special -'<token>=<value>' argument were added at the beginning of the command -line, where <value> is taken to be the standard output of the -specified command with any leading and trailing whitespace trimmed -off. +The 'trailer.<token>.command' option has been deprecated in favor of +'trailer.<token>.cmd' due to the fact that $ARG in the user's command is +only replaced once and that the original way of replacing $ARG is not safe. + -If the command contains the `$ARG` string, this string will be -replaced with the <value> part of an existing trailer with the same -<token>, if any, before the command is launched. +When both 'trailer.<token>.cmd' and 'trailer.<token>.command' are given +for the same <token>, 'trailer.<token>.cmd' is used and +'trailer.<token>.command' is ignored. + +trailer.<token>.cmd:: + This option can be used to specify a shell command that will be called: + once to automatically add a trailer with the specified <token>, and then + each time a '--trailer <token>=<value>' argument to modify the <value> of + the trailer that this option would produce. + -If some '<token>=<value>' arguments are also passed on the command -line, when a 'trailer.<token>.command' is configured, the command will -also be executed for each of these arguments. And the <value> part of -these arguments, if any, will be used to replace the `$ARG` string in -the command. +When the specified command is first called to add a trailer +with the specified <token>, the behavior is as if a special +'--trailer <token>=<value>' argument was added at the beginning +of the "git interpret-trailers" command, where <value> +is taken to be the standard output of the command with any +leading and trailing whitespace trimmed off. ++ +If some '--trailer <token>=<value>' arguments are also passed +on the command line, the command is called again once for each +of these arguments with the same <token>. And the <value> part +of these arguments, if any, will be passed to the command as its +first argument. This way the command can produce a <value> computed +from the <value> passed in the '--trailer <token>=<value>' argument. EXAMPLES -------- @@ -333,6 +346,55 @@ subject Fix #42 ------------ +* Configure a 'help' trailer with a cmd use a script `glog-find-author` + which search specified author identity from git log in git repository + and show how it works: ++ +------------ +$ cat ~/bin/glog-find-author +#!/bin/sh +test -n "$1" && git log --author="$1" --pretty="%an <%ae>" -1 || true +$ git config trailer.help.key "Helped-by: " +$ git config trailer.help.ifExists "addIfDifferentNeighbor" +$ git config trailer.help.cmd "~/bin/glog-find-author" +$ git interpret-trailers --trailer="help:Junio" --trailer="help:Couder" <<EOF +> subject +> +> message +> +> EOF +subject + +message + +Helped-by: Junio C Hamano <gitster@pobox.com> +Helped-by: Christian Couder <christian.couder@gmail.com> +------------ + +* Configure a 'ref' trailer with a cmd use a script `glog-grep` + to grep last relevant commit from git log in the git repository + and show how it works: ++ +------------ +$ cat ~/bin/glog-grep +#!/bin/sh +test -n "$1" && git log --grep "$1" --pretty=reference -1 || true +$ git config trailer.ref.key "Reference-to: " +$ git config trailer.ref.ifExists "replace" +$ git config trailer.ref.cmd "~/bin/glog-grep" +$ git interpret-trailers --trailer="ref:Add copyright notices." <<EOF +> subject +> +> message +> +> EOF +subject + +message + +Reference-to: 8bc9a0c769 (Add copyright notices., 2005-04-07) +------------ + * Configure a 'see' trailer with a command to show the subject of a commit that is related, and show how it works: + diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt index d343f040f5..3fcfd965fd 100644 --- a/Documentation/git-mailinfo.txt +++ b/Documentation/git-mailinfo.txt @@ -9,7 +9,9 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message SYNOPSIS -------- [verse] -'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch> +'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] + [--[no-]scissors] [--quoted-cr=<action>] + <msg> <patch> DESCRIPTION @@ -89,6 +91,23 @@ This can be enabled by default with the configuration option mailinfo.scissors. --no-scissors:: Ignore scissors lines. Useful for overriding mailinfo.scissors settings. +--quoted-cr=<action>:: + Action when processes email messages sent with base64 or + quoted-printable encoding, and the decoded lines end with a CRLF + instead of a simple LF. ++ +The valid actions are: ++ +-- +* `nowarn`: Git will do nothing when such a CRLF is found. +* `warn`: Git will issue a warning for each message if such a CRLF is + found. +* `strip`: Git will convert those CRLF to LF. +-- ++ +The default action could be set by configuration option `mailinfo.quotedCR`. +If no such configuration option has been set, `warn` will be used. + <msg>:: The commit log message extracted from e-mail, usually except the title line which comes from e-mail Subject. diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 80ddd33ceb..1e738ad398 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -92,10 +92,8 @@ commit-graph:: prefetch:: The `prefetch` task updates the object directory with the latest objects from all registered remotes. For each remote, a `git fetch` - command is run. The refmap is custom to avoid updating local or remote - branches (those in `refs/heads` or `refs/remotes`). Instead, the - remote refs are stored in `refs/prefetch/<remote>/`. Also, tags are - not updated. + command is run. The configured refspec is modified to place all + requested refs within `refs/prefetch/`. Also, tags are not updated. + This is done to avoid disrupting the remote-tracking branches. The end users expect these refs to stay unmoved unless they initiate a fetch. With prefetch diff --git a/Documentation/git-mktag.txt b/Documentation/git-mktag.txt index 17a2603a60..466a697519 100644 --- a/Documentation/git-mktag.txt +++ b/Documentation/git-mktag.txt @@ -11,14 +11,6 @@ SYNOPSIS [verse] 'git mktag' -OPTIONS -------- - ---strict:: - By default mktag turns on the equivalent of - linkgit:git-fsck[1] `--strict` mode. Use `--no-strict` to - disable it. - DESCRIPTION ----------- @@ -45,6 +37,14 @@ the appropriate `fsck.<msg-id>` varible: git -c fsck.extraHeaderEntry=ignore mktag <my-tag-with-headers +OPTIONS +------- + +--strict:: + By default mktag turns on the equivalent of + linkgit:git-fsck[1] `--strict` mode. Use `--no-strict` to + disable it. + Tag Format ---------- A tag signature file, to be fed to this command's standard input, diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index f89e68b424..38e5257b2a 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -762,3 +762,7 @@ IMPLEMENTATION DETAILS message indicating the p4 depot location and change number. This line is used by later 'git p4 sync' operations to know which p4 changes are new. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 25d9fbe37a..dbfd1f9017 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -128,10 +128,10 @@ depth is 4095. into multiple independent packfiles, each not larger than the given size. The size can be suffixed with "k", "m", or "g". The minimum size allowed is limited to 1 MiB. - This option - prevents the creation of a bitmap index. The default is unlimited, unless the config variable - `pack.packSizeLimit` is set. + `pack.packSizeLimit` is set. Note that this option may result in + a larger and slower repository; see the discussion in + `pack.packSizeLimit`. --honor-pack-keep:: This flag causes an object already in a local pack that diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt index f08ae27e2a..55af6fd24e 100644 --- a/Documentation/git-rebase.txt +++ b/Documentation/git-rebase.txt @@ -200,12 +200,6 @@ Alternatively, you can undo the 'git rebase' with git rebase --abort -CONFIGURATION -------------- - -include::config/rebase.txt[] -include::config/sequencer.txt[] - OPTIONS ------- --onto <newbase>:: @@ -623,6 +617,14 @@ See also INCOMPATIBLE OPTIONS below. --no-reschedule-failed-exec:: Automatically reschedule `exec` commands that failed. This only makes sense in interactive mode (or when an `--exec` option was provided). ++ +Even though this option applies once a rebase is started, it's set for +the whole rebase at the start based on either the +`rebase.rescheduleFailedExec` configuration (see linkgit:git-config[1] +or "CONFIGURATION" below) or whether this option is +provided. Otherwise an explicit `--no-reschedule-failed-exec` at the +start would be overridden by the presence of +`rebase.rescheduleFailedExec=true` configuration. INCOMPATIBLE OPTIONS -------------------- @@ -1266,6 +1268,12 @@ merge tlsv1.3 merge cmake ------------ +CONFIGURATION +------------- + +include::config/rebase.txt[] +include::config/sequencer.txt[] + BUGS ---- The todo list presented by the deprecated `--preserve-merges --interactive` diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 317d63cf0d..24c00c9384 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -121,7 +121,9 @@ depth is 4095. If specified, multiple packfiles may be created, which also prevents the creation of a bitmap index. The default is unlimited, unless the config variable - `pack.packSizeLimit` is set. + `pack.packSizeLimit` is set. Note that this option may result in + a larger and slower repository; see the discussion in + `pack.packSizeLimit`. -b:: --write-bitmap-index:: @@ -186,7 +188,7 @@ When `--unpacked` is specified, loose objects are implicitly included in this "roll-up", without respect to their reachability. This is subject to change in the future. This option (implying a drastically different repack mode) is not guaranteed to work with all other combinations of -option to `git repack`). +option to `git repack`. CONFIGURATION ------------- diff --git a/Documentation/git-rm.txt b/Documentation/git-rm.txt index ab750367fd..26e9b28470 100644 --- a/Documentation/git-rm.txt +++ b/Documentation/git-rm.txt @@ -23,7 +23,9 @@ branch, and no updates to their contents can be staged in the index, though that default behavior can be overridden with the `-f` option. When `--cached` is given, the staged content has to match either the tip of the branch or the file on disk, -allowing the file to be removed from just the index. +allowing the file to be removed from just the index. When +sparse-checkouts are in use (see linkgit:git-sparse-checkout[1]), +`git rm` will only remove paths within the sparse-checkout patterns. OPTIONS diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt index 93708aefea..3db4eab4ba 100644 --- a/Documentation/git-send-email.txt +++ b/Documentation/git-send-email.txt @@ -167,6 +167,14 @@ Sending `sendemail.envelopeSender` configuration variable; if that is unspecified, choosing the envelope sender is left to your MTA. +--sendmail-cmd=<command>:: + Specify a command to run to send the email. The command should + be sendmail-like; specifically, it must support the `-i` option. + The command will be executed in the shell if necessary. Default + is the value of `sendemail.sendmailcmd`. If unspecified, and if + --smtp-server is also unspecified, git-send-email will search + for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH. + --smtp-encryption=<encryption>:: Specify the encryption to use, either 'ssl' or 'tls'. Any other value reverts to plain SMTP. Default is the value of @@ -211,13 +219,16 @@ a password is obtained using 'git-credential'. --smtp-server=<host>:: If set, specifies the outgoing SMTP server to use (e.g. - `smtp.example.com` or a raw IP address). Alternatively it can - specify a full pathname of a sendmail-like program instead; - the program must support the `-i` option. Default value can - be specified by the `sendemail.smtpServer` configuration - option; the built-in default is to search for `sendmail` in - `/usr/sbin`, `/usr/lib` and $PATH if such program is - available, falling back to `localhost` otherwise. + `smtp.example.com` or a raw IP address). If unspecified, and if + `--sendmail-cmd` is also unspecified, the default is to search + for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH if such a + program is available, falling back to `localhost` otherwise. ++ +For backward compatibility, this option can also specify a full pathname +of a sendmail-like program instead; the program must support the `-i` +option. This method does not support passing arguments or using plain +command names. For those use cases, consider using `--sendmail-cmd` +instead. --smtp-server-port=<port>:: Specifies a port different from the default port (SMTP diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index a0eeaeb02e..fdcf43f87c 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -45,6 +45,20 @@ To avoid interfering with other worktrees, it first enables the When `--cone` is provided, the `core.sparseCheckoutCone` setting is also set, allowing for better performance with a limited set of patterns (see 'CONE PATTERN SET' below). ++ +Use the `--[no-]sparse-index` option to toggle the use of the sparse +index format. This reduces the size of the index to be more closely +aligned with your sparse-checkout definition. This can have significant +performance advantages for commands such as `git status` or `git add`. +This feature is still experimental. Some commands might be slower with +a sparse index until they are properly integrated with the feature. ++ +**WARNING:** Using a sparse index requires modifying the index in a way +that is not completely understood by external tools. If you have trouble +with this compatibility, then run `git sparse-checkout init --no-sparse-index` +to rewrite your index to not be sparse. Older versions of Git will not +understand the sparse directory entries index extension and may fail to +interact with your repository until it is disabled. 'set':: Write a set of patterns to the sparse-checkout file, as given as diff --git a/Documentation/git-stash.txt b/Documentation/git-stash.txt index a8c8c32f1e..be6084ccef 100644 --- a/Documentation/git-stash.txt +++ b/Documentation/git-stash.txt @@ -91,8 +91,10 @@ show [-u|--include-untracked|--only-untracked] [<diff-options>] [<stash>]:: By default, the command shows the diffstat, but it will accept any format known to 'git diff' (e.g., `git stash show -p stash@{1}` to view the second most recent entry in patch form). - You can use stash.showIncludeUntracked, stash.showStat, and - stash.showPatch config variables to change the default behavior. + If no `<diff-option>` is provided, the default behavior will be given + by the `stash.showStat`, and `stash.showPatch` config variables. You + can also use `stash.showIncludeUntracked` to set whether + `--include-untracked` is enabled by default. pop [--index] [-q|--quiet] [<stash>]:: diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index 67b143cc81..d5776ffcfd 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -1061,25 +1061,6 @@ with different name spaces. For example: branches = stable/*:refs/remotes/svn/stable/* branches = debug/*:refs/remotes/svn/debug/* -BUGS ----- - -We ignore all SVN properties except svn:executable. Any unhandled -properties are logged to $GIT_DIR/svn/<refname>/unhandled.log - -Renamed and copied directories are not detected by Git and hence not -tracked when committing to SVN. I do not plan on adding support for -this as it's quite difficult and time-consuming to get working for all -the possible corner cases (Git doesn't do it, either). Committing -renamed and copied files is fully supported if they're similar enough -for Git to detect them. - -In SVN, it is possible (though discouraged) to commit changes to a tag -(because a tag is just a directory copy, thus technically the same as a -branch). When cloning an SVN repository, 'git svn' cannot know if such a -commit to a tag will happen in the future. Thus it acts conservatively -and imports all SVN tags as branches, prefixing the tag name with 'tags/'. - CONFIGURATION ------------- @@ -1166,6 +1147,25 @@ $GIT_DIR/svn/\**/.rev_map.*:: if it is missing or not up to date. 'git svn reset' automatically rewinds it. +BUGS +---- + +We ignore all SVN properties except svn:executable. Any unhandled +properties are logged to $GIT_DIR/svn/<refname>/unhandled.log + +Renamed and copied directories are not detected by Git and hence not +tracked when committing to SVN. I do not plan on adding support for +this as it's quite difficult and time-consuming to get working for all +the possible corner cases (Git doesn't do it, either). Committing +renamed and copied files is fully supported if they're similar enough +for Git to detect them. + +In SVN, it is possible (though discouraged) to commit changes to a tag +(because a tag is just a directory copy, thus technically the same as a +branch). When cloning an SVN repository, 'git svn' cannot know if such a +commit to a tag will happen in the future. Thus it acts conservatively +and imports all SVN tags as branches, prefixing the tag name with 'tags/'. + SEE ALSO -------- linkgit:git-rebase[1] diff --git a/Documentation/git-worktree.txt b/Documentation/git-worktree.txt index f1bb1fa5f5..66e67e6cbf 100644 --- a/Documentation/git-worktree.txt +++ b/Documentation/git-worktree.txt @@ -387,7 +387,7 @@ These annotations are: ------------ $ git worktree list /path/to/linked-worktree abcd1234 [master] -/path/to/locked-worktreee acbd5678 (brancha) locked +/path/to/locked-worktree acbd5678 (brancha) locked /path/to/prunable-worktree 5678abc (detached HEAD) prunable ------------ diff --git a/Documentation/git.txt b/Documentation/git.txt index 3a9c44987f..6dd241ef83 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -13,7 +13,7 @@ SYNOPSIS [--exec-path[=<path>]] [--html-path] [--man-path] [--info-path] [-p|--paginate|-P|--no-pager] [--no-replace-objects] [--bare] [--git-dir=<path>] [--work-tree=<path>] [--namespace=<name>] - [--super-prefix=<path>] [--config-env <name>=<envvar>] + [--super-prefix=<path>] [--config-env=<name>=<envvar>] <command> [<args>] DESCRIPTION @@ -670,6 +670,16 @@ for further details. If this environment variable is set to `0`, git will not prompt on the terminal (e.g., when asking for HTTP authentication). +`GIT_CONFIG_GLOBAL`:: +`GIT_CONFIG_SYSTEM`:: + Take the configuration from the given files instead from global or + system-level configuration files. If `GIT_CONFIG_SYSTEM` is set, the + system config file defined at build time (usually `/etc/gitconfig`) + will not be read. Likewise, if `GIT_CONFIG_GLOBAL` is set, neither + `$HOME/.gitconfig` nor `$XDG_CONFIG_HOME/git/config` will be read. Can + be set to `/dev/null` to skip reading configuration files of the + respective level. + `GIT_CONFIG_NOSYSTEM`:: Whether to skip reading settings from the system-wide `$(prefix)/etc/gitconfig` file. This environment variable can diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 0a60472bb5..83fd4e19a4 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -845,6 +845,8 @@ patterns are available: - `rust` suitable for source code in the Rust language. +- `scheme` suitable for source code in the Scheme language. + - `tex` suitable for source code for LaTeX documents. @@ -1245,6 +1247,12 @@ to: [attr]binary -diff -merge -text ------------ +NOTES +----- + +Git does not follow symbolic links when accessing a `.gitattributes` +file in the working tree. This keeps behavior consistent when the file +is accessed from the index or a tree versus from the filesystem. EXAMPLES -------- diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index 5751603b13..53e7d5c914 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -149,6 +149,10 @@ not tracked by Git remain untracked. To stop tracking a file that is currently tracked, use 'git rm --cached'. +Git does not follow symbolic links when accessing a `.gitignore` file in +the working tree. This keeps behavior consistent when the file is +accessed from the index or a tree versus from the filesystem. + EXAMPLES -------- diff --git a/Documentation/gitmailmap.txt b/Documentation/gitmailmap.txt index 3fb39f801f..06f4af93fe 100644 --- a/Documentation/gitmailmap.txt +++ b/Documentation/gitmailmap.txt @@ -55,6 +55,13 @@ this would also match the 'Commit Name <commit@email.xx>' above: Proper Name <proper@email.xx> CoMmIt NaMe <CoMmIt@EmAiL.xX> -- +NOTES +----- + +Git does not follow symbolic links when accessing a `.mailmap` file in +the working tree. This keeps behavior consistent when the file is +accessed from the index or a tree versus from the filesystem. + EXAMPLES -------- diff --git a/Documentation/gitmodules.txt b/Documentation/gitmodules.txt index 8e333dde1b..dcee09b500 100644 --- a/Documentation/gitmodules.txt +++ b/Documentation/gitmodules.txt @@ -98,6 +98,14 @@ submodule.<name>.shallow:: shallow clone (with a history depth of 1) unless the user explicitly asks for a non-shallow clone. +NOTES +----- + +Git does not allow the `.gitmodules` file within a working tree to be a +symbolic link, and will refuse to check out such a tree entry. This +keeps behavior consistent when the file is accessed from the index or a +tree versus from the filesystem, and helps Git reliably enforce security +checks of the file contents. EXAMPLES -------- diff --git a/Documentation/gitnamespaces.txt b/Documentation/gitnamespaces.txt index b614969ad2..1c8d2ecc35 100644 --- a/Documentation/gitnamespaces.txt +++ b/Documentation/gitnamespaces.txt @@ -62,3 +62,7 @@ git clone ext::'git --namespace=foo %s /tmp/prefixed.git' ---------- include::transfer-data-leaks.txt[] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt index 67c7a50b96..c077971335 100644 --- a/Documentation/glossary-content.txt +++ b/Documentation/glossary-content.txt @@ -146,8 +146,8 @@ current branch integrates with) obviously do not work, as there is no <<def_revision,revision>> and you are "merging" another <<def_branch,branch>>'s changes that happen to be a descendant of what you have. In such a case, you do not make a new <<def_merge,merge>> - <<def_commit,commit>> but instead just update to his - revision. This will happen frequently on a + <<def_commit,commit>> but instead just update your branch to point at the same + revision as the branch you are merging. This will happen frequently on a <<def_remote_tracking_branch,remote-tracking branch>> of a remote <<def_repository,repository>>. diff --git a/Documentation/lint-gitlink.perl b/Documentation/lint-gitlink.perl index 476cc30b83..b22a367844 100755 --- a/Documentation/lint-gitlink.perl +++ b/Documentation/lint-gitlink.perl @@ -1,71 +1,67 @@ #!/usr/bin/perl -use File::Find; -use Getopt::Long; +use strict; +use warnings; -my $basedir = "."; -GetOptions("basedir=s" => \$basedir) - or die("Cannot parse command line arguments\n"); +# Parse arguments, a simple state machine for input like: +# +# howto/*.txt config/*.txt --section=1 git.txt git-add.txt [...] --to-lint git-add.txt a-file.txt [...] +my %TXT; +my %SECTION; +my $section; +my $lint_these = 0; +for my $arg (@ARGV) { + if (my ($sec) = $arg =~ /^--section=(\d+)$/s) { + $section = $sec; + next; + } -my $found_errors = 0; + my ($name) = $arg =~ /^(.*?)\.txt$/s; + unless (defined $section) { + $TXT{$name} = $arg; + next; + } -sub report { - my ($where, $what, $error) = @_; - print "$where: $error: $what\n"; - $found_errors = 1; + $SECTION{$name} = $section; } -sub grab_section { - my ($page) = @_; - open my $fh, "<", "$basedir/$page.txt"; - my $firstline = <$fh>; - chomp $firstline; - close $fh; - my ($section) = ($firstline =~ /.*\((\d)\)$/); - return $section; +my $exit_code = 0; +sub report { + my ($pos, $line, $target, $msg) = @_; + substr($line, $pos) = "' <-- HERE"; + $line =~ s/^\s+//; + print "$ARGV:$.: error: $target: $msg, shown with 'HERE' below:\n"; + print "$ARGV:$.:\t'$line\n"; + $exit_code = 1; } -sub lint { - my ($file) = @_; - open my $fh, "<", $file - or return; - while (<$fh>) { - my $where = "$file:$."; - while (s/linkgit:((.*?)\[(\d)\])//) { - my ($target, $page, $section) = ($1, $2, $3); +@ARGV = sort values %TXT; +die "BUG: Nothing to process!" unless @ARGV; +while (<>) { + my $line = $_; + while ($line =~ m/linkgit:((.*?)\[(\d)\])/g) { + my $pos = pos $line; + my ($target, $page, $section) = ($1, $2, $3); - # De-AsciiDoc - $page =~ s/{litdd}/--/g; + # De-AsciiDoc + $page =~ s/{litdd}/--/g; - if ($page !~ /^git/) { - report($where, $target, "nongit link"); - next; - } - if (! -f "$basedir/$page.txt") { - report($where, $target, "no such source"); - next; - } - $real_section = grab_section($page); - if ($real_section != $section) { - report($where, $target, - "wrong section (should be $real_section)"); - next; - } + if (!exists $TXT{$page}) { + report($pos, $line, $target, "link outside of our own docs"); + next; + } + if (!exists $SECTION{$page}) { + report($pos, $line, $target, "link outside of our sectioned docs"); + next; + } + my $real_section = $SECTION{$page}; + if ($section != $SECTION{$page}) { + report($pos, $line, $target, "wrong section (should be $real_section)"); + next; } } - close $fh; -} - -sub lint_it { - lint($File::Find::name) if -f && /\.txt$/; -} - -if (!@ARGV) { - find({ wanted => \&lint_it, no_chdir => 1 }, $basedir); -} else { - for (@ARGV) { - lint($_); - } + # this resets our $. for each file + close ARGV if eof; } -exit $found_errors; +exit $exit_code; diff --git a/Documentation/lint-man-end-blurb.perl b/Documentation/lint-man-end-blurb.perl new file mode 100755 index 0000000000..d69312e5db --- /dev/null +++ b/Documentation/lint-man-end-blurb.perl @@ -0,0 +1,24 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my $exit_code = 0; +sub report { + my ($target, $msg) = @_; + print "error: $target: $msg\n"; + $exit_code = 1; +} + +local $/; +while (my $slurp = <>) { + report($ARGV, "has no 'Part of the linkgit:git[1] suite' end blurb") + unless $slurp =~ m[ + ^GIT\n + ---\n + \QPart of the linkgit:git[1] suite\E \n + \z + ]mx; +} + +exit $exit_code; diff --git a/Documentation/lint-man-section-order.perl b/Documentation/lint-man-section-order.perl new file mode 100755 index 0000000000..b05f9156dd --- /dev/null +++ b/Documentation/lint-man-section-order.perl @@ -0,0 +1,105 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my %SECTIONS; +{ + my $order = 0; + %SECTIONS = ( + 'NAME' => { + required => 1, + order => $order++, + }, + 'SYNOPSIS' => { + required => 1, + order => $order++, + }, + 'DESCRIPTION' => { + required => 1, + order => $order++, + }, + 'OPTIONS' => { + order => $order++, + required => 0, + }, + 'CONFIGURATION' => { + order => $order++, + }, + 'BUGS' => { + order => $order++, + }, + 'SEE ALSO' => { + order => $order++, + }, + 'GIT' => { + required => 1, + order => $order++, + }, + ); +} +my $SECTION_RX = do { + my ($names) = join "|", keys %SECTIONS; + qr/^($names)$/s; +}; + +my $exit_code = 0; +sub report { + my ($msg) = @_; + print "$ARGV:$.: $msg\n"; + $exit_code = 1; +} + +my $last_was_section; +my @actual_order; +while (my $line = <>) { + chomp $line; + if ($line =~ $SECTION_RX) { + push @actual_order => $line; + $last_was_section = 1; + # Have no "last" section yet, processing NAME + next if @actual_order == 1; + + my @expected_order = sort { + $SECTIONS{$a}->{order} <=> $SECTIONS{$b}->{order} + } @actual_order; + + my $expected_last = $expected_order[-2]; + my $actual_last = $actual_order[-2]; + if ($actual_last ne $expected_last) { + report("section '$line' incorrectly ordered, comes after '$actual_last'"); + } + next; + } + if ($last_was_section) { + my $last_section = $actual_order[-1]; + if (length $last_section ne length $line) { + report("dashes under '$last_section' should match its length!"); + } + if ($line !~ /^-+$/) { + report("dashes under '$last_section' should be '-' dashes!"); + } + $last_was_section = 0; + } + + if (eof) { + # We have both a hash and an array to consider, for + # convenience + my %actual_sections; + @actual_sections{@actual_order} = (); + + for my $section (sort keys %SECTIONS) { + next if !$SECTIONS{$section}->{required} or exists $actual_sections{$section}; + report("has no required '$section' section!"); + } + + # Reset per-file state + { + @actual_order = (); + # this resets our $. for each file + close ARGV; + } + } +} + +exit $exit_code; diff --git a/Documentation/pretty-formats.txt b/Documentation/pretty-formats.txt index 45133066e4..ef6bd420ae 100644 --- a/Documentation/pretty-formats.txt +++ b/Documentation/pretty-formats.txt @@ -190,6 +190,8 @@ The placeholders are: '%ai':: author date, ISO 8601-like format '%aI':: author date, strict ISO 8601 format '%as':: author date, short format (`YYYY-MM-DD`) +'%ah':: author date, human style (like the `--date=human` option of + linkgit:git-rev-list[1]) '%cn':: committer name '%cN':: committer name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) @@ -206,6 +208,8 @@ The placeholders are: '%ci':: committer date, ISO 8601-like format '%cI':: committer date, strict ISO 8601 format '%cs':: committer date, short format (`YYYY-MM-DD`) +'%ch':: committer date, human style (like the `--date=human` option of + linkgit:git-rev-list[1]) '%d':: ref names, like the --decorate option of linkgit:git-log[1] '%D':: ref names without the " (", ")" wrapping. '%(describe[:options])':: human-readable name, like @@ -267,7 +271,7 @@ endif::git-rev-list[] `trailers` string may be followed by a colon and zero or more comma-separated options. If any option is provided multiple times the - last occurance wins. + last occurrence wins. + The boolean options accept an optional value `[=<BOOL>]`. The values `true`, `false`, `on`, `off` etc. are all accepted. See the "boolean" diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index b1c8f86c6e..5bf2a85f69 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -892,6 +892,9 @@ or units. n may be zero. The suffixes k, m, and g can be used to name units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as 'blob:limit=1024'. + +The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects +which are not of the requested type. ++ The form '--filter=sparse:oid=<blob-ish>' uses a sparse-checkout specification contained in the blob (or blob-expression) '<blob-ish>' to omit blobs that would not be not required for a sparse checkout on @@ -930,6 +933,11 @@ equivalent. --no-filter:: Turn off any previous `--filter=` argument. +--filter-provided-objects:: + Filter the list of explicitly provided objects, which would otherwise + always be printed even if they did not match any of the filters. Only + useful with `--filter=`. + --filter-print-omitted:: Only useful with `--filter=`; prints a list of the objects omitted by the filter. Object IDs are prefixed with a ``~'' character. diff --git a/Documentation/revisions.txt b/Documentation/revisions.txt index d9169c062e..f5f17b65a1 100644 --- a/Documentation/revisions.txt +++ b/Documentation/revisions.txt @@ -260,6 +260,9 @@ any of the given commits. A commit's reachable set is the commit itself and the commits in its ancestry chain. +There are several notations to specify a set of connected commits +(called a "revision range"), illustrated below. + Commit Exclusions ~~~~~~~~~~~~~~~~~ @@ -294,6 +297,26 @@ is a shorthand for 'HEAD..origin' and asks "What did the origin do since I forked from them?" Note that '..' would mean 'HEAD..HEAD' which is an empty range that is both reachable and unreachable from HEAD. +Commands that are specifically designed to take two distinct ranges +(e.g. "git range-diff R1 R2" to compare two ranges) do exist, but +they are exceptions. Unless otherwise noted, all "git" commands +that operate on a set of commits work on a single revision range. +In other words, writing two "two-dot range notation" next to each +other, e.g. + + $ git log A..B C..D + +does *not* specify two revision ranges for most commands. Instead +it will name a single connected set of commits, i.e. those that are +reachable from either B or D but are reachable from neither A or C. +In a linear history like this: + + ---A---B---o---o---C---D + +because A and B are reachable from C, the revision range specified +by these two dotted ranges is a single commit D. + + Other <rev>{caret} Parent Shorthand Notations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Three other shorthands exist, particularly useful for merge commits, diff --git a/Documentation/technical/api-error-handling.txt b/Documentation/technical/api-error-handling.txt index ceeedd485c..8be4f4d0d6 100644 --- a/Documentation/technical/api-error-handling.txt +++ b/Documentation/technical/api-error-handling.txt @@ -1,8 +1,11 @@ Error reporting in git ====================== -`die`, `usage`, `error`, and `warning` report errors of various -kinds. +`BUG`, `die`, `usage`, `error`, and `warning` report errors of +various kinds. + +- `BUG` is for failed internal assertions that should never happen, + i.e. a bug in git itself. - `die` is for fatal application errors. It prints a message to the user and exits with status 128. @@ -20,6 +23,9 @@ kinds. without running into too many problems. Like `error`, it returns -1 after reporting the situation to the caller. +These reports will be logged via the trace2 facility. See the "error" +event in link:api-trace2.txt[trace2 API]. + Customizable error handlers --------------------------- diff --git a/Documentation/technical/api-trace2.txt b/Documentation/technical/api-trace2.txt index c65ffafc48..037a91cbca 100644 --- a/Documentation/technical/api-trace2.txt +++ b/Documentation/technical/api-trace2.txt @@ -396,14 +396,14 @@ only present on the "start" and "atexit" events. } ------------ -`"discard"`:: +`"too_many_files"`:: This event is written to the git-trace2-discard sentinel file if there are too many files in the target trace directory (see the trace2.maxFiles config option). + ------------ { - "event":"discard", + "event":"too_many_files", ... } ------------ @@ -465,7 +465,7 @@ completed.) ------------ `"error"`:: - This event is emitted when one of the `error()`, `die()`, + This event is emitted when one of the `BUG()`, `error()`, `die()`, `warning()`, or `usage()` functions are called. + ------------ diff --git a/Documentation/technical/hash-function-transition.txt b/Documentation/technical/hash-function-transition.txt index 7c1630bf83..260224b033 100644 --- a/Documentation/technical/hash-function-transition.txt +++ b/Documentation/technical/hash-function-transition.txt @@ -599,7 +599,7 @@ supports four different modes of operation: convert any object names written to output to SHA-1, but store objects using SHA-256. This allows users to test the code with no visible behavior change except for performance. This allows - allows running even tests that assume the SHA-1 hash function, to + running even tests that assume the SHA-1 hash function, to sanity-check the behavior of the new mode. 2. ("early transition") Allow both SHA-1 and SHA-256 object names in diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index d363a71c37..65da0daaa5 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -44,6 +44,13 @@ Git index format localization, no special casing of directory separator '/'). Entries with the same name are sorted by their stage field. + An index entry typically represents a file. However, if sparse-checkout + is enabled in cone mode (`core.sparseCheckoutCone` is enabled) and the + `extensions.sparseIndex` extension is enabled, then the index may + contain entries for directories outside of the sparse-checkout definition. + These entries have mode `040000`, include the `SKIP_WORKTREE` bit, and + the path ends in a directory separator. + 32-bit ctime seconds, the last time a file's metadata changed this is stat(2) data @@ -385,3 +392,15 @@ The remaining data of each directory block is grouped by type: in this block of entries. - 32-bit count of cache entries in this block + +== Sparse Directory Entries + + When using sparse-checkout in cone mode, some entire directories within + the index can be summarized by pointing to a tree object instead of the + entire expanded list of paths within that tree. An index containing such + entries is a "sparse index". Index format versions 4 and less were not + implemented with such entries in mind. Thus, for these versions, an + index containing sparse directory entries will include this extension + with signature { 's', 'd', 'i', 'r' }. Like the split-index extension, + tools should avoid interacting with a sparse index unless they understand + this extension. diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt index f7eabc6c76..1eb525fe76 100644 --- a/Documentation/technical/packfile-uri.txt +++ b/Documentation/technical/packfile-uri.txt @@ -35,13 +35,14 @@ include some sort of non-trivial implementation in the Minimum Viable Product, at least so that we can test the client. This is the implementation: a feature, marked experimental, that allows the -server to be configured by one or more `uploadpack.blobPackfileUri=<sha1> -<uri>` entries. Whenever the list of objects to be sent is assembled, all such -blobs are excluded, replaced with URIs. As noted in "Future work" below, the -server can evolve in the future to support excluding other objects (or other -implementations of servers could be made that support excluding other objects) -without needing a protocol change, so clients should not expect that packfiles -downloaded in this way only contain single blobs. +server to be configured by one or more `uploadpack.blobPackfileUri= +<object-hash> <pack-hash> <uri>` entries. Whenever the list of objects to be +sent is assembled, all such blobs are excluded, replaced with URIs. As noted +in "Future work" below, the server can evolve in the future to support +excluding other objects (or other implementations of servers could be made +that support excluding other objects) without needing a protocol change, so +clients should not expect that packfiles downloaded in this way only contain +single blobs. Client design ------------- diff --git a/Documentation/technical/parallel-checkout.txt b/Documentation/technical/parallel-checkout.txt new file mode 100644 index 0000000000..e790258a1a --- /dev/null +++ b/Documentation/technical/parallel-checkout.txt @@ -0,0 +1,270 @@ +Parallel Checkout Design Notes +============================== + +The "Parallel Checkout" feature attempts to use multiple processes to +parallelize the work of uncompressing the blobs, applying in-core +filters, and writing the resulting contents to the working tree during a +checkout operation. It can be used by all checkout-related commands, +such as `clone`, `checkout`, `reset`, `sparse-checkout`, and others. + +These commands share the following basic structure: + +* Step 1: Read the current index file into memory. + +* Step 2: Modify the in-memory index based upon the command, and + temporarily mark all cache entries that need to be updated. + +* Step 3: Populate the working tree to match the new candidate index. + This includes iterating over all of the to-be-updated cache entries + and delete, create, or overwrite the associated files in the working + tree. + +* Step 4: Write the new index to disk. + +Step 3 is the focus of the "parallel checkout" effort described here. + +Sequential Implementation +------------------------- + +For the purposes of discussion here, the current sequential +implementation of Step 3 is divided in 3 parts, each one implemented in +its own function: + +* Step 3a: `unpack-trees.c:check_updates()` contains a series of + sequential loops iterating over the `cache_entry`'s array. The main + loop in this function calls the Step 3b function for each of the + to-be-updated entries. + +* Step 3b: `entry.c:checkout_entry()` examines the existing working tree + for file conflicts, collisions, and unsaved changes. It removes files + and creates leading directories as necessary. It calls the Step 3c + function for each entry to be written. + +* Step 3c: `entry.c:write_entry()` loads the blob into memory, smudges + it if necessary, creates the file in the working tree, writes the + smudged contents, calls `fstat()` or `lstat()`, and updates the + associated `cache_entry` struct with the stat information gathered. + +It wouldn't be safe to perform Step 3b in parallel, as there could be +race conditions between file creations and removals. Instead, the +parallel checkout framework lets the sequential code handle Step 3b, +and uses parallel workers to replace the sequential +`entry.c:write_entry()` calls from Step 3c. + +Rejected Multi-Threaded Solution +-------------------------------- + +The most "straightforward" implementation would be to spread the set of +to-be-updated cache entries across multiple threads. But due to the +thread-unsafe functions in the ODB code, we would have to use locks to +coordinate the parallel operation. An early prototype of this solution +showed that the multi-threaded checkout would bring performance +improvements over the sequential code, but there was still too much lock +contention. A `perf` profiling indicated that around 20% of the runtime +during a local Linux clone (on an SSD) was spent in locking functions. +For this reason this approach was rejected in favor of using multiple +child processes, which led to a better performance. + +Multi-Process Solution +---------------------- + +Parallel checkout alters the aforementioned Step 3 to use multiple +`checkout--worker` background processes to distribute the work. The +long-running worker processes are controlled by the foreground Git +command using the existing run-command API. + +Overview +~~~~~~~~ + +Step 3b is only slightly altered; for each entry to be checked out, the +main process performs the following steps: + +* M1: Check whether there is any untracked or unclean file in the + working tree which would be overwritten by this entry, and decide + whether to proceed (removing the file(s)) or not. + +* M2: Create the leading directories. + +* M3: Load the conversion attributes for the entry's path. + +* M4: Check, based on the entry's type and conversion attributes, + whether the entry is eligible for parallel checkout (more on this + later). If it is eligible, enqueue the entry and the loaded + attributes to later write the entry in parallel. If not, write the + entry right away, using the default sequential code. + +Note: we save the conversion attributes associated with each entry +because the workers don't have access to the main process' index state, +so they can't load the attributes by themselves (and the attributes are +needed to properly smudge the entry). Additionally, this has a positive +impact on performance as (1) we don't need to load the attributes twice +and (2) the attributes machinery is optimized to handle paths in +sequential order. + +After all entries have passed through the above steps, the main process +checks if the number of enqueued entries is sufficient to spread among +the workers. If not, it just writes them sequentially. Otherwise, it +spawns the workers and distributes the queued entries uniformly in +continuous chunks. This aims to minimize the chances of two workers +writing to the same directory simultaneously, which could increase lock +contention in the kernel. + +Then, for each assigned item, each worker: + +* W1: Checks if there is any non-directory file in the leading part of + the entry's path or if there already exists a file at the entry' path. + If so, mark the entry with `PC_ITEM_COLLIDED` and skip it (more on + this later). + +* W2: Creates the file (with O_CREAT and O_EXCL). + +* W3: Loads the blob into memory (inflating and delta reconstructing + it). + +* W4: Applies any required in-process filter, like end-of-line + conversion and re-encoding. + +* W5: Writes the result to the file descriptor opened at W2. + +* W6: Calls `fstat()` or lstat()` on the just-written path, and sends + the result back to the main process, together with the end status of + the operation and the item's identification number. + +Note that, when possible, steps W3 to W5 are delegated to the streaming +machinery, removing the need to keep the entire blob in memory. + +If the worker fails to read the blob or to write it to the working tree, +it removes the created file to avoid leaving empty files behind. This is +the *only* time a worker is allowed to remove a file. + +As mentioned earlier, it is the responsibility of the main process to +remove any file that blocks the checkout operation (or abort if the +removal(s) would cause data loss and the user didn't ask to `--force`). +This is crucial to avoid race conditions and also to properly detect +path collisions at Step W1. + +After the workers finish writing the items and sending back the required +information, the main process handles the results in two steps: + +- First, it updates the in-memory index with the `lstat()` information + sent by the workers. (This must be done first as this information + might me required in the following step.) + +- Then it writes the items which collided on disk (i.e. items marked + with `PC_ITEM_COLLIDED`). More on this below. + +Path Collisions +--------------- + +Path collisions happen when two different paths correspond to the same +entry in the file system. E.g. the paths 'a' and 'A' would collide in a +case-insensitive file system. + +The sequential checkout deals with collisions in the same way that it +deals with files that were already present in the working tree before +checkout. Basically, it checks if the path that it wants to write +already exists on disk, makes sure the existing file doesn't have +unsaved data, and then overwrites it. (To be more pedantic: it deletes +the existing file and creates the new one.) So, if there are multiple +colliding files to be checked out, the sequential code will write each +one of them but only the last will actually survive on disk. + +Parallel checkout aims to reproduce the same behavior. However, we +cannot let the workers racily write to the same file on disk. Instead, +the workers detect when the entry that they want to check out would +collide with an existing file, and mark it with `PC_ITEM_COLLIDED`. +Later, the main process can sequentially feed these entries back to +`checkout_entry()` without the risk of race conditions. On clone, this +also has the effect of marking the colliding entries to later emit a +warning for the user, like the classic sequential checkout does. + +The workers are able to detect both collisions among the entries being +concurrently written and collisions between a parallel-eligible entry +and an ineligible entry. The general idea for collision detection is +quite straightforward: for each parallel-eligible entry, the main +process must remove all files that prevent this entry from being written +(before enqueueing it). This includes any non-directory file in the +leading path of the entry. Later, when a worker gets assigned the entry, +it looks again for the non-directories files and for an already existing +file at the entry's path. If any of these checks finds something, the +worker knows that there was a path collision. + +Because parallel checkout can distinguish path collisions from the case +where the file was already present in the working tree before checkout, +we could alternatively choose to skip the checkout of colliding entries. +However, each entry that doesn't get written would have NULL `lstat()` +fields on the index. This could cause performance penalties for +subsequent commands that need to refresh the index, as they would have +to go to the file system to see if the entry is dirty. Thus, if we have +N entries in a colliding group and we decide to write and `lstat()` only +one of them, every subsequent `git-status` will have to read, convert, +and hash the written file N - 1 times. By checking out all colliding +entries (like the sequential code does), we only pay the overhead once, +during checkout. + +Eligible Entries for Parallel Checkout +-------------------------------------- + +As previously mentioned, not all entries passed to `checkout_entry()` +will be considered eligible for parallel checkout. More specifically, we +exclude: + +- Symbolic links; to avoid race conditions that, in combination with + path collisions, could cause workers to write files at the wrong + place. For example, if we were to concurrently check out a symlink + 'a' -> 'b' and a regular file 'A/f' in a case-insensitive file system, + we could potentially end up writing the file 'A/f' at 'a/f', due to a + race condition. + +- Regular files that require external filters (either "one shot" filters + or long-running process filters). These filters are black-boxes to Git + and may have their own internal locking or non-concurrent assumptions. + So it might not be safe to run multiple instances in parallel. ++ +Besides, long-running filters may use the delayed checkout feature to +postpone the return of some filtered blobs. The delayed checkout queue +and the parallel checkout queue are not compatible and should remain +separate. ++ +Note: regular files that only require internal filters, like end-of-line +conversion and re-encoding, are eligible for parallel checkout. + +Ineligible entries are checked out by the classic sequential codepath +*before* spawning workers. + +Note: submodules's files are also eligible for parallel checkout (as +long as they don't fall into any of the excluding categories mentioned +above). But since each submodule is checked out in its own child +process, we don't mix the superproject's and the submodules' files in +the same parallel checkout process or queue. + +The API +------- + +The parallel checkout API was designed with the goal of minimizing +changes to the current users of the checkout machinery. This means that +they don't have to call a different function for sequential or parallel +checkout. As already mentioned, `checkout_entry()` will automatically +insert the given entry in the parallel checkout queue when this feature +is enabled and the entry is eligible; otherwise, it will just write the +entry right away, using the sequential code. In general, callers of the +parallel checkout API should look similar to this: + +---------------------------------------------- +int pc_workers, pc_threshold, err = 0; +struct checkout state; + +get_parallel_checkout_configs(&pc_workers, &pc_threshold); + +/* + * This check is not strictly required, but it + * should save some time in sequential mode. + */ +if (pc_workers > 1) + init_parallel_checkout(); + +for (each cache_entry ce to-be-updated) + err |= checkout_entry(ce, &state, NULL, NULL); + +err |= run_parallel_checkout(&state, pc_workers, pc_threshold, NULL, NULL); +---------------------------------------------- diff --git a/Documentation/technical/partial-clone.txt b/Documentation/technical/partial-clone.txt index 0780d30cac..a0dd7c66f2 100644 --- a/Documentation/technical/partial-clone.txt +++ b/Documentation/technical/partial-clone.txt @@ -242,8 +242,7 @@ remote in a specific order. repository and can satisfy all such requests. - Repack essentially treats promisor and non-promisor packfiles as 2 - distinct partitions and does not mix them. Repack currently only works - on non-promisor packfiles and loose objects. + distinct partitions and does not mix them. - Dynamic object fetching invokes fetch-pack once *for each item* because most algorithms stumble upon a missing object and need to have @@ -273,9 +272,6 @@ to use those promisor remotes in that order." The user might want to work in a triangular work flow with multiple promisor remotes that each have an incomplete view of the repository. -- Allow repack to work on promisor packfiles (while keeping them distinct - from non-promisor packfiles). - - Allow non-pathname-based filters to make use of packfile bitmaps (when present). This was just an omission during the initial implementation. diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index f4ed141774..1040d85319 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -346,6 +346,14 @@ explained below. client should download from all given URIs. Currently, the protocols supported are "http" and "https". +If the 'wait-for-done' feature is advertised, the following argument +can be included in the client's request. + + wait-for-done + Indicates to the server that it should never send "ready", but + should wait for the client to say "done" before sending the + packfile. + The response of `fetch` is broken into a number of sections separated by delimiter packets (0001), with each section beginning with its section header. Most sections are sent only when the packfile is sent. @@ -532,7 +540,7 @@ An `object-info` request takes the following arguments: Indicates to the server an object which the client wants to obtain information for. -The response of `object-info` is a list of the the requested object ids +The response of `object-info` is a list of the requested object ids and associated requested information, each separated by a single space. output = info flush-pkt diff --git a/Documentation/technical/reftable.txt b/Documentation/technical/reftable.txt index 3ef169af27..d7c3b645cf 100644 --- a/Documentation/technical/reftable.txt +++ b/Documentation/technical/reftable.txt @@ -1011,8 +1011,13 @@ reftable stack, reload `tables.list`, and delete any tables no longer mentioned in `tables.list`. Irregular program exit may still leave about unused files. In this case, a -cleanup operation can read `tables.list`, note its modification timestamp, and -delete any unreferenced `*.ref` files that are older. +cleanup operation should proceed as follows: + +* take a lock `tables.list.lock` to prevent concurrent modifications +* refresh the reftable stack, by reading `tables.list` +* for each `*.ref` file, remove it if +** it is not mentioned in `tables.list`, and +** its max update_index is not beyond the max update_index of the stack Alternatives considered diff --git a/Documentation/technical/remembering-renames.txt b/Documentation/technical/remembering-renames.txt new file mode 100644 index 0000000000..2fd5cc88e0 --- /dev/null +++ b/Documentation/technical/remembering-renames.txt @@ -0,0 +1,671 @@ +Rebases and cherry-picks involve a sequence of merges whose results are +recorded as new single-parent commits. The first parent side of those +merges represent the "upstream" side, and often include a far larger set of +changes than the second parent side. Traditionally, the renames on the +first-parent side of that sequence of merges were repeatedly re-detected +for every merge. This file explains why it is safe and effective during +rebases and cherry-picks to remember renames on the upstream side of +history as an optimization, assuming all merges are automatic and clean +(i.e. no conflicts and not interrupted for user input or editing). + +Outline: + + 0. Assumptions + + 1. How rebasing and cherry-picking work + + 2. Why the renames on MERGE_SIDE1 in any given pick are *always* a + superset of the renames on MERGE_SIDE1 for the next pick. + + 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ always also + a rename on MERGE_SIDE1 for the next pick + + 4. A detailed description of the the counter-examples to #3. + + 5. Why the special cases in #4 are still fully reasonable to use to pair + up files for three-way content merging in the merge machinery, and why + they do not affect the correctness of the merge. + + 6. Interaction with skipping of "irrelevant" renames + + 7. Additional items that need to be cached + + 8. How directory rename detection interacts with the above and why this + optimization is still safe even if merge.directoryRenames is set to + "true". + + +=== 0. Assumptions === + +There are two assumptions that will hold throughout this document: + + * The upstream side where commits are transplanted to is treated as the + first parent side when rebase/cherry-pick call the merge machinery + + * All merges are fully automatic + +and a third that will hold in sections 2-5 for simplicity, that I'll later +address in section 8: + + * No directory renames occur + + +Let me explain more about each assumption and why I include it: + + +The first assumption is merely for the purposes of making this document +clearer; the optimization implementation does not actually depend upon it. +However, the assumption does hold in all cases because it reflects the way +that both rebase and cherry-pick were implemented; and the implementation +of cherry-pick and rebase are not readily changeable for backwards +compatibility reasons (see for example the discussion of the --ours and +--theirs flag in the documentation of `git checkout`, particularly the +comments about how they behave with rebase). The optimization avoids +checking first-parent-ness, though. It checks the conditions that make the +optimization valid instead, so it would still continue working if someone +changed the parent ordering that cherry-pick and rebase use. But making +this assumption does make this document much clearer and prevents me from +having to repeat every example twice. + +If the second assumption is violated, then the optimization simply is +turned off and thus isn't relevant to consider. The second assumption can +also be stated as "there is no interruption for a user to resolve conflicts +or to just further edit or tweak files". While real rebases and +cherry-picks are often interrupted (either because it's an interactive +rebase where the user requested to stop and edit, or because there were +conflicts that the user needs to resolve), the cache of renames is not +stored on disk, and thus is thrown away as soon as the rebase or cherry +pick stops for the user to resolve the operation. + +The third assumption makes sections 2-5 simpler, and allows people to +understand the basics of why this optimization is safe and effective, and +then I can go back and address the specifics in section 8. It is probably +also worth noting that if directory renames do occur, then the default of +merge.directoryRenames being set to "conflict" means that the operation +will stop for users to resolve the conflicts and the cache will be thrown +away, and thus that there won't be an optimization to apply. So, the only +reason we need to address directory renames specifically, is that some +users will have set merge.directoryRenames to "true" to allow the merges to +continue to proceed automatically. The optimization is still safe with +this config setting, but we have to discuss a few more cases to show why; +this discussion is deferred until section 8. + + +=== 1. How rebasing and cherry-picking work === + +Consider the following setup (from the git-rebase manpage): + + A---B---C topic + / + D---E---F---G main + +After rebasing or cherry-picking topic onto main, this will appear as: + + A'--B'--C' topic + / + D---E---F---G main + +The way the commits A', B', and C' are created is through a series of +merges, where rebase or cherry-pick sequentially uses each of the three +A-B-C commits in a special merge operation. Let's label the three commits +in the merge operation as MERGE_BASE, MERGE_SIDE1, and MERGE_SIDE2. For +this picture, the three commits for each of the three merges would be: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +To create C': + MERGE_BASE: B + MERGE_SIDE1: B' + MERGE_SIDE2: C + +Sometimes, folks are surprised that these three-way merges are done. It +can be useful in understanding these three-way merges to view them in a +slightly different light. For example, in creating C', you can view it as +either: + + * Apply the changes between B & C to B' + * Apply the changes between B & B' to C + +Conceptually the two statements above are the same as a three-way merge of +B, B', and C, at least the parts before you decide to record a commit. + + +=== 2. Why the renames on MERGE_SIDE1 in any given pick are always a === +=== superset of the renames on MERGE_SIDE1 for the next pick. === + +The merge machinery uses the filenames it is fed from MERGE_BASE, +MERGE_SIDE1, and MERGE_SIDE2. It will only move content to a different +filename under one of three conditions: + + * To make both pieces of a conflict available to a user during conflict + resolution (examples: directory/file conflict, add/add type conflict + such as symlink vs. regular file) + + * When MERGE_SIDE1 renames the file. + + * When MERGE_SIDE2 renames the file. + +First, let's remember what commits are involved in the first and second +picks of the cherry-pick or rebase sequence: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +So, in particular, we need to show that the renames between E and G are a +superset of those between A and A'. + +A' is created by the first merge. A' will only have renames for one of the +three reasons listed above. The first case, a conflict, results in a +situation where the cache is dropped and thus this optimization doesn't +take effect, so we need not consider that case. The third case, a rename +on MERGE_SIDE2 (i.e. from G to A), will show up in A' but it also shows up +in A -- therefore when diffing A and A' that path does not show up as a +rename. The only remaining way for renames to show up in A' is for the +rename to come from MERGE_SIDE1. Therefore, all renames between A and A' +are a subset of those between E and G. Equivalently, all renames between E +and G are a superset of those between A and A'. + + +=== 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ === +=== always also a rename on MERGE_SIDE1 for the next pick. === + +Let's again look at the first two picks: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +Now let's look at any given rename from MERGE_SIDE1 of the first pick, i.e. +any given rename from E to G. Let's use the filenames 'oldfile' and +'newfile' for demonstration purposes. That first pick will function as +follows; when the rename is detected, the merge machinery will do a +three-way content merge of the following: + E:oldfile + G:newfile + A:oldfile +and produce a new result: + A':newfile + +Note above that I've assumed that E->A did not rename oldfile. If that +side did rename, then we most likely have a rename/rename(1to2) conflict +that will cause the rebase or cherry-pick operation to halt and drop the +in-memory cache of renames and thus doesn't need to be considered further. +In the special case that E->A does rename the file but also renames it to +newfile, then there is no conflict from the renaming and the merge can +succeed. In this special case, the rename is not valid to cache because +the second merge will find A:newfile in the MERGE_BASE (see also the new +testcases in t6429 with "rename same file identically" in their +description). So a rename/rename(1to1) needs to be specially handled by +pruning renames from the cache and decrementing the dir_rename_counts in +the current and leading directories associated with those renames. Or, +since these are really rare, one could just take the easy way out and +disable the remembering renames optimization when a rename/rename(1to1) +happens. + +The previous paragraph handled the cases for E->A renaming oldfile, let's +continue assuming that oldfile is not renamed in A. + +As per the diagram for creating B', MERGE_SIDE1 involves the changes from A +to A'. So, we are curious whether A:oldfile and A':newfile will be viewed +as renames. Note that: + + * There will be no A':oldfile (because there could not have been a + G:oldfile as we do not do break detection in the merge machinery and + G:newfile was detected as a rename, and by the construction of the + rename above that merged cleanly, the merge machinery will ensure there + is no 'oldfile' in the result). + + * There will be no A:newfile (if there had been, we would have had a + rename/add conflict). + + * Clearly A:oldfile and A':newfile are "related" (A':newfile came from a + clean three-way content merge involving A:oldfile). + +We can also expound on the third point above, by noting that three-way +content merges can also be viewed as applying the differences between the +base and one side to the other side. Thus we can view A':newfile as +having been created by taking the changes between E:oldfile and G:newfile +(which were detected as being related, i.e. <50% changed) to A:oldfile. + +Thus A:oldfile and A':newfile are just as related as E:oldfile and +G:newfile are -- they have exactly identical differences. Since the latter +were detected as renames, A:oldfile and A':newfile should also be +detectable as renames almost always. + + +=== 4. A detailed description of the counter-examples to #3. === + +We already noted in section 3 that rename/rename(1to1) (i.e. both sides +renaming a file the same way) was one counter-example. The more +interesting bit, though, is why did we need to use the "almost" qualifier +when stating that A:oldfile and A':newfile are "almost" always detectable +as renames? + +Let's repeat an earlier point that section 3 made: + + A':newfile was created by applying the changes between E:oldfile and + G:newfile to A:oldfile. The changes between E:oldfile and G:newfile were + <50% of the size of E:oldfile. + +If those changes that were <50% of the size of E:oldfile are also <50% of +the size of A:oldfile, then A:oldfile and A':newfile will be detectable as +renames. However, if there is a dramatic size reduction between E:oldfile +and A:oldfile (but the changes between E:oldfile, G:newfile, and A:oldfile +still somehow merge cleanly), then traditional rename detection would not +detect A:oldfile and A':newfile as renames. + +Here's an example where that can happen: + * E:oldfile had 20 lines + * G:newfile added 10 new lines at the beginning of the file + * A:oldfile kept the first 3 lines of the file, and deleted all the rest +then + => A':newfile would have 13 lines, 3 of which matches those in A:oldfile. +E:oldfile -> G:newfile would be detected as a rename, but A:oldfile and +A':newfile would not be. + + +=== 5. Why the special cases in #4 are still fully reasonable to use to === +=== pair up files for three-way content merging in the merge machinery, === +=== and why they do not affect the correctness of the merge. === + +In the rename/rename(1to1) case, A:newfile and A':newfile are not renames +since they use the *same* filename. However, files with the same filename +are obviously fine to pair up for three-way content merging (the merge +machinery has never employed break detection). The interesting +counter-example case is thus not the rename/rename(1to1) case, but the case +where A did not rename oldfile. That was the case that we spent most of +the time discussing in sections 3 and 4. The remainder of this section +will be devoted to that case as well. + +So, even if A:oldfile and A':newfile aren't detectable as renames, why is +it still reasonable to pair them up for three-way content merging in the +merge machinery? There are multiple reasons: + + * As noted in sections 3 and 4, the diff between A:oldfile and A':newfile + is *exactly* the same as the diff between E:oldfile and G:newfile. The + latter pair were detected as renames, so it seems unlikely to surprise + users for us to treat A:oldfile and A':newfile as renames. + + * In fact, "oldfile" and "newfile" were at one point detected as renames + due to how they were constructed in the E..G chain. And we used that + information once already in this rebase/cherry-pick. I think users + would be unlikely to be surprised at us continuing to treat the files + as renames and would quickly understand why we had done so. + + * Marking or declaring files as renames is *not* the end goal for merges. + Merges use renames to determine which files make sense to be paired up + for three-way content merges. + + * A:oldfile and A':newfile were _already_ paired up in a three-way + content merge; that is how A':newfile was created. In fact, that + three-way content merge was clean. So using them again in a later + three-way content merge seems very reasonable. + +However, the above is focusing on the common scenarios. Let's try to look +at all possible unusual scenarios and compare without the optimization to +with the optimization. Consider the following theoretical cases; we will +then dive into each to determine which of them are possible, +and if so, what they mean: + + 1. Without the optimization, the second merge results in a conflict. + With the optimization, the second merge also results in a conflict. + Questions: Are the conflicts confusingly different? Better in one case? + + 2. Without the optimization, the second merge results in NO conflict. + With the optimization, the second merge also results in NO conflict. + Questions: Are the merges the same? + + 3. Without the optimization, the second merge results in a conflict. + With the optimization, the second merge results in NO conflict. + Questions: Possible? Bug, bugfix, or something else? + + 4. Without the optimization, the second merge results in NO conflict. + With the optimization, the second merge results in a conflict. + Questions: Possible? Bug, bugfix, or something else? + +I'll consider all four cases, but out of order. + +The fourth case is impossible. For the code without the remembering +renames optimization to not get a conflict, B:oldfile would need to exactly +match A:oldfile -- if it doesn't, there would be a modify/delete conflict. +If A:oldfile matches B:oldfile exactly, then a three-way content merge +between A:oldfile, A':newfile, and B:oldfile would have no conflict and +just give us the version of newfile from A' as the result. + +From the same logic as the above paragraph, the second case would indeed +result in identical merges. When A:oldfile exactly matches B:oldfile, an +undetected rename would say, "Oh, I see one side didn't modify 'oldfile' +and the other side deleted it. I'll delete it. And I see you have this +brand new file named 'newfile' in A', so I'll keep it." That gives the +same results as three-way content merging A:oldfile, A':newfile, and +B:oldfile -- a removal of oldfile with the version of newfile from A' +showing up in the result. + +The third case is interesting. It means that A:oldfile and A':newfile were +not just similar enough, but that the changes between them did not conflict +with the changes between A:oldfile and B:oldfile. This would validate our +hunch that the files were similar enough to be used in a three-way content +merge, and thus seems entirely correct for us to have used them that way. +(Sidenote: One particular example here may be enlightening. Let's say that +B was an immediate revert of A. B clearly would have been a clean revert +of A, since A was B's immediate parent. One would assume that if you can +pick a commit, you should also be able to cherry-pick its immediate revert. +However, this is one of those funny corner cases; without this +optimization, we just successfully picked a commit cleanly, but we are +unable to cherry-pick its immediate revert due to the size differences +between E:oldfile and A:oldfile.) + +That leaves only the first case to consider -- when we get conflicts both +with or without the optimization. Without the optimization, we'll have a +modify/delete conflict, where both A':newfile and B:oldfile are left in the +tree for the user to deal with and no hints about the potential similarity +between the two. With the optimization, we'll have a three-way content +merged A:oldfile, A':newfile, and B:oldfile with conflict markers +suggesting we thought the files were related but giving the user the chance +to resolve. As noted above, I don't think users will find us treating +'oldfile' and 'newfile' as related as a surprise since they were between E +and G. In any event, though, this case shouldn't be concerning since we +hit a conflict in both cases, told the user what we know, and asked them to +resolve it. + +So, in summary, case 4 is impossible, case 2 yields the same behavior, and +cases 1 and 3 seem to provide as good or better behavior with the +optimization than without. + + +=== 6. Interaction with skipping of "irrelevant" renames === + +Previous optimizations involved skipping rename detection for paths +considered to be "irrelevant". See for example the following commits: + + * 32a56dfb99 ("merge-ort: precompute subset of sources for which we + need rename detection", 2021-03-11) + * 2fd9eda462 ("merge-ort: precompute whether directory rename + detection is needed", 2021-03-11) + * 9bd342137e ("diffcore-rename: determine which relevant_sources are + no longer relevant", 2021-03-13) + +Relevance is always determined by what the _other_ side of history has +done, in terms of modifing a file that our side renamed, or adding a +file to a directory which our side renamed. This means that a path +that is "irrelevant" when picking the first commit of a series in a +rebase or cherry-pick, may suddenly become "relevant" when picking the +next commit. + +The upshot of this is that we can only cache rename detection results +for relevant paths, and need to re-check relevance in subsequent +commits. If those subsequent commits have additional paths that are +relevant for rename detection, then we will need to redo rename +detection -- though we can limit it to the paths for which we have not +already detected renames. + + +=== 7. Additional items that need to be cached === + +It turns out we have to cache more than just renames; we also cache: + + A) non-renames (i.e. unpaired deletes) + B) counts of renames within directories + C) sources that were marked as RELEVANT_LOCATION, but which were + downgraded to RELEVANT_NO_MORE + D) the toplevel trees involved in the merge + +These are all stored in struct rename_info, and respectively appear in + * cached_pairs (along side actual renames, just with a value of NULL) + * dir_rename_counts + * cached_irrelevant + * merge_trees + +The reason for (A) comes from the irrelevant renames skipping +optimization discussed in section 6. The fact that irrelevant renames +are skipped means we only get a subset of the potential renames +detected and subsequent commits may need to run rename detection on +the upstream side on a subset of the remaining renames (to get the +renames that are relevant for that later commit). Since unpaired +deletes are involved in rename detection too, we don't want to +repeatedly check that those paths remain unpaired on the upstream side +with every commit we are transplanting. + +The reason for (B) is that diffcore_rename_extended() is what +generates the counts of renames by directory which is needed in +directory rename detection, and if we don't run +diffcore_rename_extended() again then we need to have the output from +it, including dir_rename_counts, from the previous run. + +The reason for (C) is that merge-ort's tree traversal will again think +those paths are relevant (marking them as RELEVANT_LOCATION), but the +fact that they were downgraded to RELEVANT_NO_MORE means that +dir_rename_counts already has the information we need for directory +rename detection. (A path which becomes RELEVANT_CONTENT in a +subsequent commit will be removed from cached_irrelevant.) + +The reason for (D) is that is how we determine whether the remember +renames optimization can be used. In particular, remembering that our +sequence of merges looks like: + + Merge 1: + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + => Creates A' + + Merge 2: + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + => Creates B' + +It is the fact that the trees A and A' appear both in Merge 1 and in +Merge 2, with A as a parent of A' that allows this optimization. So +we store the trees to compare with what we are asked to merge next +time. + + +=== 8. How directory rename detection interacts with the above and === +=== why this optimization is still safe even if === +=== merge.directoryRenames is set to "true". === + +As noted in the assumptions section: + + """ + ...if directory renames do occur, then the default of + merge.directoryRenames being set to "conflict" means that the operation + will stop for users to resolve the conflicts and the cache will be + thrown away, and thus that there won't be an optimization to apply. + So, the only reason we need to address directory renames specifically, + is that some users will have set merge.directoryRenames to "true" to + allow the merges to continue to proceed automatically. + """ + +Let's remember that we need to look at how any given pick affects the next +one. So let's again use the first two picks from the diagram in section +one: + + First pick does this three-way merge: + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + => creates A' + + Second pick does this three-way merge: + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + => creates B' + +Now, directory rename detection exists so that if one side of history +renames a directory, and the other side adds a new file to the old +directory, then the merge (with merge.directoryRenames=true) can move the +file into the new directory. There are two qualitatively different ways to +add a new file to an old directory: create a new file, or rename a file +into that directory. Also, directory renames can be done on either side of +history, so there are four cases to consider: + + * MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir + * MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir + * MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir + * MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir + +One last note before we consider these four cases: There are some +important properties about how we implement this optimization with +respect to directory rename detection that we need to bear in mind +while considering all of these cases: + + * rename caching occurs *after* applying directory renames + + * a rename created by directory rename detection is recorded for the side + of history that did the directory rename. + + * dir_rename_counts, the nested map of + {oldname => {newname => count}}, + is cached between runs as well. This basically means that directory + rename detection is also cached, though only on the side of history + that we cache renames for (MERGE_SIDE1 as far as this document is + concerned; see the assumptions section). Two interesting sub-notes + about these counts: + + * If we need to perform rename-detection again on the given side (e.g. + some paths are relevant for rename detection that weren't before), + then we clear dir_rename_counts and recompute it, making use of + cached_pairs. The reason it is important to do this is optimizations + around RELEVANT_LOCATION exist to prevent us from computing + unnecessary renames for directory rename detection and from computing + dir_rename_counts for irrelevant directories; but those same renames + or directories may become necessary for subsequent merges. The + easiest way to "fix up" dir_rename_counts in such cases is to just + recompute it. + + * If we prune rename/rename(1to1) entries from the cache, then we also + need to update dir_rename_counts to decrement the counts for the + involved directory and any relevant parent directories (to undo what + update_dir_rename_counts() in diffcore-rename.c incremented when the + rename was initially found). If we instead just disable the + remembering renames optimization when the exceedingly rare + rename/rename(1to1) cases occur, then dir_rename_counts will get + re-computed the next time rename detection occurs, as noted above. + + * the side with multiple commits to pick, is the side of history that we + do NOT cache renames for. Thus, there are no additional commits to + change the number of renames in a directory, except for those done by + directory rename detection (which always pad the majority). + + * the "renames" we cache are modified slightly by any directory rename, + as noted below. + +Now, with those notes out of the way, let's go through the four cases +in order: + +Case 1: MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Renames olddir/ -> newdir/ + MERGE_SIDE2: A, Adds olddir/newfile + => creates A', With newdir/newfile + + MERGE_BASE: A, Has olddir/newfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Modifies olddir/newfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers olddir/ -> newdir/ + * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Case 2: MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir + + This case looks like this: + MERGE_BASE: E oldfile, olddir/ + MERGE_SIDE1: G oldfile, olddir/ -> newdir/ + MERGE_SIDE2: A oldfile -> olddir/newfile + => creates A', With newdir/newfile representing original oldfile + + MERGE_BASE: A olddir/newfile + MERGE_SIDE1: A' newdir/newfile + MERGE_SIDE2: B modify olddir/newfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers olddir/ -> newdir/ + * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile + (NOT oldfile -> newdir/newfile; compare to case with + (p->status == 'R' && new_path) in possibly_cache_new_pair()) + + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Case 3: MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Adds olddir/newfile + MERGE_SIDE2: A, Renames olddir/ -> newdir/ + => creates A', With newdir/newfile + + MERGE_BASE: A, Has newdir/, but no notion of newdir/newfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Has newdir/, but no notion of newdir/newfile + => expected B', with newdir/newfile from A' + + In this case, with the optimization, note that after the first commit there + were no renames on MERGE_SIDE1, and any renames on MERGE_SIDE2 are tossed. + But the second merge didn't need any renames so this is fine. + +Case 4: MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Renames oldfile -> olddir/newfile + MERGE_SIDE2: A, Renames olddir/ -> newdir/ + => creates A', With newdir/newfile representing original oldfile + + MERGE_BASE: A, Has oldfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Modifies oldfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers oldfile -> newdir/newfile + (NOT oldfile -> olddir/newfile; compare to case of second + block under p->status == 'R' in possibly_cache_new_pair()) + * MERGE_SIDE2 renames are tossed because only MERGE_SIDE1 is remembered + + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Finally, I'll just note here that interactions with the +skip-irrelevant-renames optimization means we sometimes don't detect +renames for any files within a directory that was renamed, in which +case we will not have been able to detect any rename for the directory +itself. In such a case, we do not know whether the directory was +renamed; we want to be careful to avoid cacheing some kind of "this +directory was not renamed" statement. If we did, then a subsequent +commit being rebased could add a file to the old directory, and the +user would expect it to end up in the correct directory -- something +our erroneous "this directory was not renamed" cache would preclude. diff --git a/Documentation/technical/sparse-index.txt b/Documentation/technical/sparse-index.txt new file mode 100644 index 0000000000..3b24c1a219 --- /dev/null +++ b/Documentation/technical/sparse-index.txt @@ -0,0 +1,208 @@ +Git Sparse-Index Design Document +================================ + +The sparse-checkout feature allows users to focus a working directory on +a subset of the files at HEAD. The cone mode patterns, enabled by +`core.sparseCheckoutCone`, allow for very fast pattern matching to +discover which files at HEAD belong in the sparse-checkout cone. + +Three important scale dimensions for a Git working directory are: + +* `HEAD`: How many files are present at `HEAD`? + +* Populated: How many files are within the sparse-checkout cone. + +* Modified: How many files has the user modified in the working directory? + +We will use big-O notation -- O(X) -- to denote how expensive certain +operations are in terms of these dimensions. + +These dimensions are ordered by their magnitude: users (typically) modify +fewer files than are populated, and we can only populate files at `HEAD`. + +Problems occur if there is an extreme imbalance in these dimensions. For +example, if `HEAD` contains millions of paths but the populated set has +only tens of thousands, then commands like `git status` and `git add` can +be dominated by operations that require O(`HEAD`) operations instead of +O(Populated). Primarily, the cost is in parsing and rewriting the index, +which is filled primarily with files at `HEAD` that are marked with the +`SKIP_WORKTREE` bit. + +The sparse-index intends to take these commands that read and modify the +index from O(`HEAD`) to O(Populated). To do this, we need to modify the +index format in a significant way: add "sparse directory" entries. + +With cone mode patterns, it is possible to detect when an entire +directory will have its contents outside of the sparse-checkout definition. +Instead of listing all of the files it contains as individual entries, a +sparse-index contains an entry with the directory name, referencing the +object ID of the tree at `HEAD` and marked with the `SKIP_WORKTREE` bit. +If we need to discover the details for paths within that directory, we +can parse trees to find that list. + +At time of writing, sparse-directory entries violate expectations about the +index format and its in-memory data structure. There are many consumers in +the codebase that expect to iterate through all of the index entries and +see only files. In fact, these loops expect to see a reference to every +staged file. One way to handle this is to parse trees to replace a +sparse-directory entry with all of the files within that tree as the index +is loaded. However, parsing trees is slower than parsing the index format, +so that is a slower operation than if we left the index alone. The plan is +to make all of these integrations "sparse aware" so this expansion through +tree parsing is unnecessary and they use fewer resources than when using a +full index. + +The implementation plan below follows four phases to slowly integrate with +the sparse-index. The intention is to incrementally update Git commands to +interact safely with the sparse-index without significant slowdowns. This +may not always be possible, but the hope is that the primary commands that +users need in their daily work are dramatically improved. + +Phase I: Format and initial speedups +------------------------------------ + +During this phase, Git learns to enable the sparse-index and safely parse +one. Protections are put in place so that every consumer of the in-memory +data structure can operate with its current assumption of every file at +`HEAD`. + +At first, every index parse will call a helper method, +`ensure_full_index()`, which scans the index for sparse-directory entries +(pointing to trees) and replaces them with the full list of paths (with +blob contents) by parsing tree objects. This will be slower in all cases. +The only noticeable change in behavior will be that the serialized index +file contains sparse-directory entries. + +To start, we use a new required index extension, `sdir`, to allow +inserting sparse-directory entries into indexes with file format +versions 2, 3, and 4. This prevents Git versions that do not understand +the sparse-index from operating on one, while allowing tools that do not +understand the sparse-index to operate on repositories as long as they do +not interact with the index. A new format, index v5, will be introduced +that includes sparse-directory entries by default. It might also +introduce other features that have been considered for improving the +index, as well. + +Next, consumers of the index will be guarded against operating on a +sparse-index by inserting calls to `ensure_full_index()` or +`expand_index_to_path()`. If a specific path is requested, then those will +be protected from within the `index_file_exists()` and `index_name_pos()` +API calls: they will call `ensure_full_index()` if necessary. The +intention here is to preserve existing behavior when interacting with a +sparse-checkout. We don't want a change to happen by accident, without +tests. Many of these locations may not need any change before removing the +guards, but we should not do so without tests to ensure the expected +behavior happens. + +It may be desirable to _change_ the behavior of some commands in the +presence of a sparse index or more generally in any sparse-checkout +scenario. In such cases, these should be carefully communicated and +tested. No such behavior changes are intended during this phase. + +During a scan of the codebase, not every iteration of the cache entries +needs an `ensure_full_index()` check. The basic reasons include: + +1. The loop is scanning for entries with non-zero stage. These entries + are not collapsed into a sparse-directory entry. + +2. The loop is scanning for submodules. These entries are not collapsed + into a sparse-directory entry. + +3. The loop is part of the index API, especially around reading or + writing the format. + +4. The loop is checking for correct order of cache entries and that is + correct if and only if the sparse-directory entries are in the correct + location. + +5. The loop ignores entries with the `SKIP_WORKTREE` bit set, or is + otherwise already aware of sparse directory entries. + +6. The sparse-index is disabled at this point when using the split-index + feature, so no effort is made to protect the split-index API. + +Even after inserting these guards, we will keep expanding sparse-indexes +for most Git commands using the `command_requires_full_index` repository +setting. This setting will be on by default and disabled one builtin at a +time until we have sufficient confidence that all of the index operations +are properly guarded. + +To complete this phase, the commands `git status` and `git add` will be +integrated with the sparse-index so that they operate with O(Populated) +performance. They will be carefully tested for operations within and +outside the sparse-checkout definition. + +Phase II: Careful integrations +------------------------------ + +This phase focuses on ensuring that all index extensions and APIs work +well with a sparse-index. This requires significant increases to our test +coverage, especially for operations that interact with the working +directory outside of the sparse-checkout definition. Some of these +behaviors may not be the desirable ones, such as some tests already +marked for failure in `t1092-sparse-checkout-compatibility.sh`. + +The index extensions that may require special integrations are: + +* FS Monitor +* Untracked cache + +While integrating with these features, we should look for patterns that +might lead to better APIs for interacting with the index. Coalescing +common usage patterns into an API call can reduce the number of places +where sparse-directories need to be handled carefully. + +Phase III: Important command speedups +------------------------------------- + +At this point, the patterns for testing and implementing sparse-directory +logic should be relatively stable. This phase focuses on updating some of +the most common builtins that use the index to operate as O(Populated). +Here is a potential list of commands that could be valuable to integrate +at this point: + +* `git commit` +* `git checkout` +* `git merge` +* `git rebase` + +Hopefully, commands such as `git merge` and `git rebase` can benefit +instead from merge algorithms that do not use the index as a data +structure, such as the merge-ORT strategy. As these topics mature, we +may enable the ORT strategy by default for repositories using the +sparse-index feature. + +Along with `git status` and `git add`, these commands cover the majority +of users' interactions with the working directory. In addition, we can +integrate with these commands: + +* `git grep` +* `git rm` + +These have been proposed as some whose behavior could change when in a +repo with a sparse-checkout definition. It would be good to include this +behavior automatically when using a sparse-index. Some clarity is needed +to make the behavior switch clear to the user. + +This phase is the first where parallel work might be possible without too +much conflicts between topics. + +Phase IV: The long tail +----------------------- + +This last phase is less a "phase" and more "the new normal" after all of +the previous work. + +To start, the `command_requires_full_index` option could be removed in +favor of expanding only when hitting an API guard. + +There are many Git commands that could use special attention to operate as +O(Populated), while some might be so rare that it is acceptable to leave +them with additional overhead when a sparse-index is present. + +Here are some commands that might be useful to update: + +* `git sparse-checkout set` +* `git am` +* `git clean` +* `git stash` |