diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/Makefile | 77 | ||||
-rw-r--r-- | Documentation/RelNotes/2.33.0.txt | 78 | ||||
-rw-r--r-- | Documentation/config/color.txt | 5 | ||||
-rw-r--r-- | Documentation/diff-options.txt | 8 | ||||
-rw-r--r-- | Documentation/git-describe.txt | 14 | ||||
-rw-r--r-- | Documentation/git-send-email.txt | 25 | ||||
-rw-r--r-- | Documentation/glossary-content.txt | 4 | ||||
-rw-r--r-- | Documentation/revisions.txt | 23 | ||||
-rw-r--r-- | Documentation/technical/packfile-uri.txt | 15 | ||||
-rw-r--r-- | Documentation/technical/partial-clone.txt | 6 | ||||
-rw-r--r-- | Documentation/technical/remembering-renames.txt | 671 |
11 files changed, 846 insertions, 80 deletions
diff --git a/Documentation/Makefile b/Documentation/Makefile index 2aae4c9cbb..f5605b7767 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -139,6 +139,7 @@ ASCIIDOC_CONF = -f asciidoc.conf ASCIIDOC_COMMON = $(ASCIIDOC) $(ASCIIDOC_EXTRA) $(ASCIIDOC_CONF) \ -amanversion=$(GIT_VERSION) \ -amanmanual='Git Manual' -amansource='Git' +ASCIIDOC_DEPS = asciidoc.conf GIT-ASCIIDOCFLAGS TXT_TO_HTML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_HTML) TXT_TO_XML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_DOCBOOK) MANPAGE_XSL = manpage-normal.xsl @@ -193,6 +194,7 @@ ASCIIDOC_DOCBOOK = docbook5 ASCIIDOC_EXTRA += -acompat-mode -atabsize=8 ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions ASCIIDOC_EXTRA += -alitdd='&\#x2d;&\#x2d;' +ASCIIDOC_DEPS = asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS DBLATEX_COMMON = XMLTO_EXTRA += --skip-validation XMLTO_EXTRA += -x manpage.xsl @@ -294,9 +296,7 @@ docdep_prereqs = \ cmd-list.made $(cmds_txt) doc.dep : $(docdep_prereqs) $(DOC_DEP_TXT) build-docdep.perl - $(QUIET_GEN)$(RM) $@+ $@ && \ - $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ - mv $@+ $@ + $(QUIET_GEN)$(PERL_PATH) ./build-docdep.perl >$@ $(QUIET_STDERR) ifneq ($(MAKECMDGOALS),clean) -include doc.dep @@ -316,8 +316,7 @@ cmds_txt = cmds-ancillaryinterrogators.txt \ $(cmds_txt): cmd-list.made cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) - $(QUIET_GEN)$(RM) $@ && \ - $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \ + $(QUIET_GEN)$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \ date >$@ mergetools_txt = mergetools-diff.txt mergetools-merge.txt @@ -325,7 +324,7 @@ mergetools_txt = mergetools-diff.txt mergetools-merge.txt $(mergetools_txt): mergetools-list.made mergetools-list.made: ../git-mergetool--lib.sh $(wildcard ../mergetools/*) - $(QUIET_GEN)$(RM) $@ && \ + $(QUIET_GEN) \ $(SHELL_PATH) -c 'MERGE_TOOLS_DIR=../mergetools && \ . ../git-mergetool--lib.sh && \ show_tool_names can_diff "* " || :' >mergetools-diff.txt && \ @@ -354,32 +353,23 @@ clean: $(RM) manpage-base-url.xsl $(RM) GIT-ASCIIDOCFLAGS -$(MAN_HTML): %.html : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_HTML) -d manpage -o $@+ $< && \ - mv $@+ $@ +$(MAN_HTML): %.html : %.txt $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_HTML) -d manpage -o $@ $< -$(OBSOLETE_HTML): %.html : %.txto asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_HTML) -o $@+ $< && \ - mv $@+ $@ +$(OBSOLETE_HTML): %.html : %.txto $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_HTML) -o $@ $< manpage-base-url.xsl: manpage-base-url.xsl.in $(QUIET_GEN)sed "s|@@MAN_BASE_URL@@|$(MAN_BASE_URL)|" $< > $@ %.1 %.5 %.7 : %.xml manpage-base-url.xsl $(wildcard manpage*.xsl) - $(QUIET_XMLTO)$(RM) $@ && \ - $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + $(QUIET_XMLTO)$(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< -%.xml : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_XML) -d manpage -o $@+ $< && \ - mv $@+ $@ +%.xml : %.txt $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_XML) -d manpage -o $@ $< user-manual.xml: user-manual.txt user-manual.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_XML) -d book -o $@+ $< && \ - mv $@+ $@ + $(QUIET_ASCIIDOC)$(TXT_TO_XML) -d book -o $@ $< technical/api-index.txt: technical/api-index-skel.txt \ technical/api-index.sh $(patsubst %,%.txt,$(API_DOCS)) @@ -400,46 +390,35 @@ XSLTOPTS += --stringparam html.stylesheet docbook-xsl.css XSLTOPTS += --param generate.consistent.ids 1 user-manual.html: user-manual.xml $(XSLT) - $(QUIET_XSLTPROC)$(RM) $@+ $@ && \ - xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \ - mv $@+ $@ + $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< git.info: user-manual.texi $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi user-manual.texi: user-manual.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ - $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ + $(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@+ && \ + $(PERL_PATH) fix-texi.perl <$@+ >$@ && \ + $(RM) $@+ user-manual.pdf: user-manual.xml - $(QUIET_DBLATEX)$(RM) $@+ $@ && \ - $(DBLATEX) -o $@+ $(DBLATEX_COMMON) $< && \ - mv $@+ $@ + $(QUIET_DBLATEX)$(DBLATEX) -o $@ $(DBLATEX_COMMON) $< gitman.texi: $(MAN_XML) cat-texi.perl texi.xsl - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ + $(QUIET_DB2TEXI) \ ($(foreach xml,$(sort $(MAN_XML)),xsltproc -o $(xml)+ texi.xsl $(xml) && \ $(DOCBOOK2X_TEXI) --encoding=UTF-8 --to-stdout $(xml)+ && \ - rm $(xml)+ &&) true) > $@++ && \ - $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ + $(RM) $(xml)+ &&) true) > $@+ && \ + $(PERL_PATH) cat-texi.perl $@ <$@+ >$@ && \ + $(RM) $@+ gitman.info: gitman.texi $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \ - mv $@+ $@ + $(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@ howto-index.txt: howto-index.sh $(HOWTO_TXT) - $(QUIET_GEN)$(RM) $@+ $@ && \ - '$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@+ && \ - mv $@+ $@ + $(QUIET_GEN)'$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt $(QUIET_ASCIIDOC)$(TXT_TO_HTML) $*.txt @@ -448,10 +427,9 @@ WEBDOC_DEST = /pub/software/scm/git/docs howto/%.html: ASCIIDOC_EXTRA += -a git-relative-html-prefix=../ $(patsubst %.txt,%.html,$(HOWTO_TXT)): %.html : %.txt GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(QUIET_ASCIIDOC) \ sed -e '1,/^$$/d' $< | \ - $(TXT_TO_HTML) - >$@+ && \ - mv $@+ $@ + $(TXT_TO_HTML) - >$@ install-webdoc : html '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) @@ -492,4 +470,7 @@ doc-l10n install-l10n:: $(MAKE) -C po $@ endif +# Delete the target file on error +.DELETE_ON_ERROR: + .PHONY: FORCE diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt new file mode 100644 index 0000000000..57443c7466 --- /dev/null +++ b/Documentation/RelNotes/2.33.0.txt @@ -0,0 +1,78 @@ +Git 2.33 Release Notes +====================== + +Backward compatibility notes +---------------------------- + + * The "-m" option in "git log -m" that does not specify which format, + if any, of diff is desired did not have any visible effect; it now + implies some form of diff (by default "--patch") is produced. + + You can disable the diff output with "git log -m --no-patch", but + then there probably isn't much point in passing "-m" in the first + place ;-). + + +Updates since Git 2.32 +---------------------- + +UI, Workflows & Features + + * "git send-email" learned the "--sendmail-cmd" command line option + and the "sendemail.sendmailCmd" configuration variable, which is a + more sensible approach than the current way of repurposing the + "smtp-server" that is meant to name the server to instead name the + command to talk to the server. + + * The "-m" option in "git log -m" that does not specify which format, + if any, of diff is desired did not have any visible effect; it now + implies some form of diff (by default "--patch") is produced. + + +Performance, Internal Implementation, Development Support etc. + + * The code to handle the "--format" option in "for-each-ref" and + friends made too many string comparisons on %(atom)s used in the + format string, which has been corrected by converting them into + enum when the format string is parsed. + + * Use the hashfile API in the codepath that writes the index file to + reduce code duplication. + + * Repeated rename detections in a sequence of mergy operations have + been optimize out. + + +Fixes since v2.32 +----------------- + + * We historically rejected a very short string as an author name + while accepting a patch e-mail, which has been loosened. + (merge 72ee47ceeb ef/mailinfo-short-name later to maint). + + * The parallel checkout codepath did not initialize object ID field + used to talk to the worker processes in a futureproof way. + + * Rewrite code that triggers undefined behaviour warning. + (merge aafa5df0df jn/size-t-casted-to-off-t-fix later to maint). + + * The description of "fast-forward" in the glossary has been updated. + (merge e22f2daed0 ry/clarify-fast-forward-in-glossary later to maint). + + * Recent "git clone" left a temporary directory behind when the + transport layer returned an failure. + (merge 6aacb7d861 jk/clone-clean-upon-transport-error later to maint). + + * "git fetch" over protocol v2 left its side of the socket open after + it finished speaking, which unnecessarily wasted the resource on + the other side. + (merge ae1a7eefff jk/fetch-pack-v2-half-close-early later to maint). + + * Other code cleanup, docfix, build fix, etc. + (merge bfe35a6165 ah/doc-describe later to maint). + (merge f302c1e4aa jc/clarify-revision-range later to maint). + (merge 3127ff90ea tl/fix-packfile-uri-doc later to maint). + (merge a84216c684 jk/doc-color-pager later to maint). + (merge 4e0a64a713 ab/trace2-squelch-gcc-warning later to maint). + (merge 225f7fa847 ps/rev-list-object-type-filter later to maint). + (merge 5317dfeaed dd/honor-users-tar-in-tests later to maint). diff --git a/Documentation/config/color.txt b/Documentation/config/color.txt index d5daacb13a..e05d520a86 100644 --- a/Documentation/config/color.txt +++ b/Documentation/config/color.txt @@ -127,8 +127,9 @@ color.interactive.<slot>:: interactive commands. color.pager:: - A boolean to enable/disable colored output when the pager is in - use (default is true). + A boolean to specify whether `auto` color modes should colorize + output going to the pager. Defaults to true; set this to false + if your pager does not understand ANSI color codes. color.push:: A boolean to enable/disable color in push errors. May be set to diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 530d115914..32e6dee5ac 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -49,10 +49,9 @@ ifdef::git-log[] --diff-merges=m::: -m::: This option makes diff output for merge commits to be shown in - the default format. `-m` will produce the output only if `-p` - is given as well. The default format could be changed using + the default format. The default format could be changed using `log.diffMerges` configuration parameter, which default value - is `separate`. + is `separate`. `-m` implies `-p`. + --diff-merges=first-parent::: --diff-merges=1::: @@ -62,7 +61,8 @@ ifdef::git-log[] --diff-merges=separate::: This makes merge commits show the full diff with respect to each of the parents. Separate log entry and diff is generated - for each parent. + for each parent. This is the format that `-m` produced + historically. + --diff-merges=combined::: --diff-merges=c::: diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt index a88f6ae2c6..c6a79c2a0f 100644 --- a/Documentation/git-describe.txt +++ b/Documentation/git-describe.txt @@ -63,9 +63,10 @@ OPTIONS Automatically implies --tags. --abbrev=<n>:: - Instead of using the default 7 hexadecimal digits as the - abbreviated object name, use <n> digits, or as many digits - as needed to form a unique object name. An <n> of 0 + Instead of using the default number of hexadecimal digits (which + will vary according to the number of objects in the repository with + a default of 7) of the abbreviated object name, use <n> digits, or + as many digits as needed to form a unique object name. An <n> of 0 will suppress long format, only showing the closest tag. --candidates=<n>:: @@ -139,8 +140,11 @@ at the end. The number of additional commits is the number of commits which would be displayed by "git log v1.0.4..parent". -The hash suffix is "-g" + unambiguous abbreviation for the tip commit -of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). +The hash suffix is "-g" + an unambigous abbreviation for the tip commit +of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). The +length of the abbreviation scales as the repository grows, using the +approximate number of objects in the repository and a bit of math +around the birthday paradox, and defaults to a minimum of 7. The "g" prefix stands for "git" and is used to allow describing the version of a software depending on the SCM the software is managed with. This is useful in an environment where people may use different SCMs. diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt index 93708aefea..3db4eab4ba 100644 --- a/Documentation/git-send-email.txt +++ b/Documentation/git-send-email.txt @@ -167,6 +167,14 @@ Sending `sendemail.envelopeSender` configuration variable; if that is unspecified, choosing the envelope sender is left to your MTA. +--sendmail-cmd=<command>:: + Specify a command to run to send the email. The command should + be sendmail-like; specifically, it must support the `-i` option. + The command will be executed in the shell if necessary. Default + is the value of `sendemail.sendmailcmd`. If unspecified, and if + --smtp-server is also unspecified, git-send-email will search + for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH. + --smtp-encryption=<encryption>:: Specify the encryption to use, either 'ssl' or 'tls'. Any other value reverts to plain SMTP. Default is the value of @@ -211,13 +219,16 @@ a password is obtained using 'git-credential'. --smtp-server=<host>:: If set, specifies the outgoing SMTP server to use (e.g. - `smtp.example.com` or a raw IP address). Alternatively it can - specify a full pathname of a sendmail-like program instead; - the program must support the `-i` option. Default value can - be specified by the `sendemail.smtpServer` configuration - option; the built-in default is to search for `sendmail` in - `/usr/sbin`, `/usr/lib` and $PATH if such program is - available, falling back to `localhost` otherwise. + `smtp.example.com` or a raw IP address). If unspecified, and if + `--sendmail-cmd` is also unspecified, the default is to search + for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH if such a + program is available, falling back to `localhost` otherwise. ++ +For backward compatibility, this option can also specify a full pathname +of a sendmail-like program instead; the program must support the `-i` +option. This method does not support passing arguments or using plain +command names. For those use cases, consider using `--sendmail-cmd` +instead. --smtp-server-port=<port>:: Specifies a port different from the default port (SMTP diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt index 67c7a50b96..c077971335 100644 --- a/Documentation/glossary-content.txt +++ b/Documentation/glossary-content.txt @@ -146,8 +146,8 @@ current branch integrates with) obviously do not work, as there is no <<def_revision,revision>> and you are "merging" another <<def_branch,branch>>'s changes that happen to be a descendant of what you have. In such a case, you do not make a new <<def_merge,merge>> - <<def_commit,commit>> but instead just update to his - revision. This will happen frequently on a + <<def_commit,commit>> but instead just update your branch to point at the same + revision as the branch you are merging. This will happen frequently on a <<def_remote_tracking_branch,remote-tracking branch>> of a remote <<def_repository,repository>>. diff --git a/Documentation/revisions.txt b/Documentation/revisions.txt index d9169c062e..f5f17b65a1 100644 --- a/Documentation/revisions.txt +++ b/Documentation/revisions.txt @@ -260,6 +260,9 @@ any of the given commits. A commit's reachable set is the commit itself and the commits in its ancestry chain. +There are several notations to specify a set of connected commits +(called a "revision range"), illustrated below. + Commit Exclusions ~~~~~~~~~~~~~~~~~ @@ -294,6 +297,26 @@ is a shorthand for 'HEAD..origin' and asks "What did the origin do since I forked from them?" Note that '..' would mean 'HEAD..HEAD' which is an empty range that is both reachable and unreachable from HEAD. +Commands that are specifically designed to take two distinct ranges +(e.g. "git range-diff R1 R2" to compare two ranges) do exist, but +they are exceptions. Unless otherwise noted, all "git" commands +that operate on a set of commits work on a single revision range. +In other words, writing two "two-dot range notation" next to each +other, e.g. + + $ git log A..B C..D + +does *not* specify two revision ranges for most commands. Instead +it will name a single connected set of commits, i.e. those that are +reachable from either B or D but are reachable from neither A or C. +In a linear history like this: + + ---A---B---o---o---C---D + +because A and B are reachable from C, the revision range specified +by these two dotted ranges is a single commit D. + + Other <rev>{caret} Parent Shorthand Notations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Three other shorthands exist, particularly useful for merge commits, diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt index f7eabc6c76..1eb525fe76 100644 --- a/Documentation/technical/packfile-uri.txt +++ b/Documentation/technical/packfile-uri.txt @@ -35,13 +35,14 @@ include some sort of non-trivial implementation in the Minimum Viable Product, at least so that we can test the client. This is the implementation: a feature, marked experimental, that allows the -server to be configured by one or more `uploadpack.blobPackfileUri=<sha1> -<uri>` entries. Whenever the list of objects to be sent is assembled, all such -blobs are excluded, replaced with URIs. As noted in "Future work" below, the -server can evolve in the future to support excluding other objects (or other -implementations of servers could be made that support excluding other objects) -without needing a protocol change, so clients should not expect that packfiles -downloaded in this way only contain single blobs. +server to be configured by one or more `uploadpack.blobPackfileUri= +<object-hash> <pack-hash> <uri>` entries. Whenever the list of objects to be +sent is assembled, all such blobs are excluded, replaced with URIs. As noted +in "Future work" below, the server can evolve in the future to support +excluding other objects (or other implementations of servers could be made +that support excluding other objects) without needing a protocol change, so +clients should not expect that packfiles downloaded in this way only contain +single blobs. Client design ------------- diff --git a/Documentation/technical/partial-clone.txt b/Documentation/technical/partial-clone.txt index 0780d30cac..a0dd7c66f2 100644 --- a/Documentation/technical/partial-clone.txt +++ b/Documentation/technical/partial-clone.txt @@ -242,8 +242,7 @@ remote in a specific order. repository and can satisfy all such requests. - Repack essentially treats promisor and non-promisor packfiles as 2 - distinct partitions and does not mix them. Repack currently only works - on non-promisor packfiles and loose objects. + distinct partitions and does not mix them. - Dynamic object fetching invokes fetch-pack once *for each item* because most algorithms stumble upon a missing object and need to have @@ -273,9 +272,6 @@ to use those promisor remotes in that order." The user might want to work in a triangular work flow with multiple promisor remotes that each have an incomplete view of the repository. -- Allow repack to work on promisor packfiles (while keeping them distinct - from non-promisor packfiles). - - Allow non-pathname-based filters to make use of packfile bitmaps (when present). This was just an omission during the initial implementation. diff --git a/Documentation/technical/remembering-renames.txt b/Documentation/technical/remembering-renames.txt new file mode 100644 index 0000000000..2fd5cc88e0 --- /dev/null +++ b/Documentation/technical/remembering-renames.txt @@ -0,0 +1,671 @@ +Rebases and cherry-picks involve a sequence of merges whose results are +recorded as new single-parent commits. The first parent side of those +merges represent the "upstream" side, and often include a far larger set of +changes than the second parent side. Traditionally, the renames on the +first-parent side of that sequence of merges were repeatedly re-detected +for every merge. This file explains why it is safe and effective during +rebases and cherry-picks to remember renames on the upstream side of +history as an optimization, assuming all merges are automatic and clean +(i.e. no conflicts and not interrupted for user input or editing). + +Outline: + + 0. Assumptions + + 1. How rebasing and cherry-picking work + + 2. Why the renames on MERGE_SIDE1 in any given pick are *always* a + superset of the renames on MERGE_SIDE1 for the next pick. + + 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ always also + a rename on MERGE_SIDE1 for the next pick + + 4. A detailed description of the the counter-examples to #3. + + 5. Why the special cases in #4 are still fully reasonable to use to pair + up files for three-way content merging in the merge machinery, and why + they do not affect the correctness of the merge. + + 6. Interaction with skipping of "irrelevant" renames + + 7. Additional items that need to be cached + + 8. How directory rename detection interacts with the above and why this + optimization is still safe even if merge.directoryRenames is set to + "true". + + +=== 0. Assumptions === + +There are two assumptions that will hold throughout this document: + + * The upstream side where commits are transplanted to is treated as the + first parent side when rebase/cherry-pick call the merge machinery + + * All merges are fully automatic + +and a third that will hold in sections 2-5 for simplicity, that I'll later +address in section 8: + + * No directory renames occur + + +Let me explain more about each assumption and why I include it: + + +The first assumption is merely for the purposes of making this document +clearer; the optimization implementation does not actually depend upon it. +However, the assumption does hold in all cases because it reflects the way +that both rebase and cherry-pick were implemented; and the implementation +of cherry-pick and rebase are not readily changeable for backwards +compatibility reasons (see for example the discussion of the --ours and +--theirs flag in the documentation of `git checkout`, particularly the +comments about how they behave with rebase). The optimization avoids +checking first-parent-ness, though. It checks the conditions that make the +optimization valid instead, so it would still continue working if someone +changed the parent ordering that cherry-pick and rebase use. But making +this assumption does make this document much clearer and prevents me from +having to repeat every example twice. + +If the second assumption is violated, then the optimization simply is +turned off and thus isn't relevant to consider. The second assumption can +also be stated as "there is no interruption for a user to resolve conflicts +or to just further edit or tweak files". While real rebases and +cherry-picks are often interrupted (either because it's an interactive +rebase where the user requested to stop and edit, or because there were +conflicts that the user needs to resolve), the cache of renames is not +stored on disk, and thus is thrown away as soon as the rebase or cherry +pick stops for the user to resolve the operation. + +The third assumption makes sections 2-5 simpler, and allows people to +understand the basics of why this optimization is safe and effective, and +then I can go back and address the specifics in section 8. It is probably +also worth noting that if directory renames do occur, then the default of +merge.directoryRenames being set to "conflict" means that the operation +will stop for users to resolve the conflicts and the cache will be thrown +away, and thus that there won't be an optimization to apply. So, the only +reason we need to address directory renames specifically, is that some +users will have set merge.directoryRenames to "true" to allow the merges to +continue to proceed automatically. The optimization is still safe with +this config setting, but we have to discuss a few more cases to show why; +this discussion is deferred until section 8. + + +=== 1. How rebasing and cherry-picking work === + +Consider the following setup (from the git-rebase manpage): + + A---B---C topic + / + D---E---F---G main + +After rebasing or cherry-picking topic onto main, this will appear as: + + A'--B'--C' topic + / + D---E---F---G main + +The way the commits A', B', and C' are created is through a series of +merges, where rebase or cherry-pick sequentially uses each of the three +A-B-C commits in a special merge operation. Let's label the three commits +in the merge operation as MERGE_BASE, MERGE_SIDE1, and MERGE_SIDE2. For +this picture, the three commits for each of the three merges would be: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +To create C': + MERGE_BASE: B + MERGE_SIDE1: B' + MERGE_SIDE2: C + +Sometimes, folks are surprised that these three-way merges are done. It +can be useful in understanding these three-way merges to view them in a +slightly different light. For example, in creating C', you can view it as +either: + + * Apply the changes between B & C to B' + * Apply the changes between B & B' to C + +Conceptually the two statements above are the same as a three-way merge of +B, B', and C, at least the parts before you decide to record a commit. + + +=== 2. Why the renames on MERGE_SIDE1 in any given pick are always a === +=== superset of the renames on MERGE_SIDE1 for the next pick. === + +The merge machinery uses the filenames it is fed from MERGE_BASE, +MERGE_SIDE1, and MERGE_SIDE2. It will only move content to a different +filename under one of three conditions: + + * To make both pieces of a conflict available to a user during conflict + resolution (examples: directory/file conflict, add/add type conflict + such as symlink vs. regular file) + + * When MERGE_SIDE1 renames the file. + + * When MERGE_SIDE2 renames the file. + +First, let's remember what commits are involved in the first and second +picks of the cherry-pick or rebase sequence: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +So, in particular, we need to show that the renames between E and G are a +superset of those between A and A'. + +A' is created by the first merge. A' will only have renames for one of the +three reasons listed above. The first case, a conflict, results in a +situation where the cache is dropped and thus this optimization doesn't +take effect, so we need not consider that case. The third case, a rename +on MERGE_SIDE2 (i.e. from G to A), will show up in A' but it also shows up +in A -- therefore when diffing A and A' that path does not show up as a +rename. The only remaining way for renames to show up in A' is for the +rename to come from MERGE_SIDE1. Therefore, all renames between A and A' +are a subset of those between E and G. Equivalently, all renames between E +and G are a superset of those between A and A'. + + +=== 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ === +=== always also a rename on MERGE_SIDE1 for the next pick. === + +Let's again look at the first two picks: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +Now let's look at any given rename from MERGE_SIDE1 of the first pick, i.e. +any given rename from E to G. Let's use the filenames 'oldfile' and +'newfile' for demonstration purposes. That first pick will function as +follows; when the rename is detected, the merge machinery will do a +three-way content merge of the following: + E:oldfile + G:newfile + A:oldfile +and produce a new result: + A':newfile + +Note above that I've assumed that E->A did not rename oldfile. If that +side did rename, then we most likely have a rename/rename(1to2) conflict +that will cause the rebase or cherry-pick operation to halt and drop the +in-memory cache of renames and thus doesn't need to be considered further. +In the special case that E->A does rename the file but also renames it to +newfile, then there is no conflict from the renaming and the merge can +succeed. In this special case, the rename is not valid to cache because +the second merge will find A:newfile in the MERGE_BASE (see also the new +testcases in t6429 with "rename same file identically" in their +description). So a rename/rename(1to1) needs to be specially handled by +pruning renames from the cache and decrementing the dir_rename_counts in +the current and leading directories associated with those renames. Or, +since these are really rare, one could just take the easy way out and +disable the remembering renames optimization when a rename/rename(1to1) +happens. + +The previous paragraph handled the cases for E->A renaming oldfile, let's +continue assuming that oldfile is not renamed in A. + +As per the diagram for creating B', MERGE_SIDE1 involves the changes from A +to A'. So, we are curious whether A:oldfile and A':newfile will be viewed +as renames. Note that: + + * There will be no A':oldfile (because there could not have been a + G:oldfile as we do not do break detection in the merge machinery and + G:newfile was detected as a rename, and by the construction of the + rename above that merged cleanly, the merge machinery will ensure there + is no 'oldfile' in the result). + + * There will be no A:newfile (if there had been, we would have had a + rename/add conflict). + + * Clearly A:oldfile and A':newfile are "related" (A':newfile came from a + clean three-way content merge involving A:oldfile). + +We can also expound on the third point above, by noting that three-way +content merges can also be viewed as applying the differences between the +base and one side to the other side. Thus we can view A':newfile as +having been created by taking the changes between E:oldfile and G:newfile +(which were detected as being related, i.e. <50% changed) to A:oldfile. + +Thus A:oldfile and A':newfile are just as related as E:oldfile and +G:newfile are -- they have exactly identical differences. Since the latter +were detected as renames, A:oldfile and A':newfile should also be +detectable as renames almost always. + + +=== 4. A detailed description of the counter-examples to #3. === + +We already noted in section 3 that rename/rename(1to1) (i.e. both sides +renaming a file the same way) was one counter-example. The more +interesting bit, though, is why did we need to use the "almost" qualifier +when stating that A:oldfile and A':newfile are "almost" always detectable +as renames? + +Let's repeat an earlier point that section 3 made: + + A':newfile was created by applying the changes between E:oldfile and + G:newfile to A:oldfile. The changes between E:oldfile and G:newfile were + <50% of the size of E:oldfile. + +If those changes that were <50% of the size of E:oldfile are also <50% of +the size of A:oldfile, then A:oldfile and A':newfile will be detectable as +renames. However, if there is a dramatic size reduction between E:oldfile +and A:oldfile (but the changes between E:oldfile, G:newfile, and A:oldfile +still somehow merge cleanly), then traditional rename detection would not +detect A:oldfile and A':newfile as renames. + +Here's an example where that can happen: + * E:oldfile had 20 lines + * G:newfile added 10 new lines at the beginning of the file + * A:oldfile kept the first 3 lines of the file, and deleted all the rest +then + => A':newfile would have 13 lines, 3 of which matches those in A:oldfile. +E:oldfile -> G:newfile would be detected as a rename, but A:oldfile and +A':newfile would not be. + + +=== 5. Why the special cases in #4 are still fully reasonable to use to === +=== pair up files for three-way content merging in the merge machinery, === +=== and why they do not affect the correctness of the merge. === + +In the rename/rename(1to1) case, A:newfile and A':newfile are not renames +since they use the *same* filename. However, files with the same filename +are obviously fine to pair up for three-way content merging (the merge +machinery has never employed break detection). The interesting +counter-example case is thus not the rename/rename(1to1) case, but the case +where A did not rename oldfile. That was the case that we spent most of +the time discussing in sections 3 and 4. The remainder of this section +will be devoted to that case as well. + +So, even if A:oldfile and A':newfile aren't detectable as renames, why is +it still reasonable to pair them up for three-way content merging in the +merge machinery? There are multiple reasons: + + * As noted in sections 3 and 4, the diff between A:oldfile and A':newfile + is *exactly* the same as the diff between E:oldfile and G:newfile. The + latter pair were detected as renames, so it seems unlikely to surprise + users for us to treat A:oldfile and A':newfile as renames. + + * In fact, "oldfile" and "newfile" were at one point detected as renames + due to how they were constructed in the E..G chain. And we used that + information once already in this rebase/cherry-pick. I think users + would be unlikely to be surprised at us continuing to treat the files + as renames and would quickly understand why we had done so. + + * Marking or declaring files as renames is *not* the end goal for merges. + Merges use renames to determine which files make sense to be paired up + for three-way content merges. + + * A:oldfile and A':newfile were _already_ paired up in a three-way + content merge; that is how A':newfile was created. In fact, that + three-way content merge was clean. So using them again in a later + three-way content merge seems very reasonable. + +However, the above is focusing on the common scenarios. Let's try to look +at all possible unusual scenarios and compare without the optimization to +with the optimization. Consider the following theoretical cases; we will +then dive into each to determine which of them are possible, +and if so, what they mean: + + 1. Without the optimization, the second merge results in a conflict. + With the optimization, the second merge also results in a conflict. + Questions: Are the conflicts confusingly different? Better in one case? + + 2. Without the optimization, the second merge results in NO conflict. + With the optimization, the second merge also results in NO conflict. + Questions: Are the merges the same? + + 3. Without the optimization, the second merge results in a conflict. + With the optimization, the second merge results in NO conflict. + Questions: Possible? Bug, bugfix, or something else? + + 4. Without the optimization, the second merge results in NO conflict. + With the optimization, the second merge results in a conflict. + Questions: Possible? Bug, bugfix, or something else? + +I'll consider all four cases, but out of order. + +The fourth case is impossible. For the code without the remembering +renames optimization to not get a conflict, B:oldfile would need to exactly +match A:oldfile -- if it doesn't, there would be a modify/delete conflict. +If A:oldfile matches B:oldfile exactly, then a three-way content merge +between A:oldfile, A':newfile, and B:oldfile would have no conflict and +just give us the version of newfile from A' as the result. + +From the same logic as the above paragraph, the second case would indeed +result in identical merges. When A:oldfile exactly matches B:oldfile, an +undetected rename would say, "Oh, I see one side didn't modify 'oldfile' +and the other side deleted it. I'll delete it. And I see you have this +brand new file named 'newfile' in A', so I'll keep it." That gives the +same results as three-way content merging A:oldfile, A':newfile, and +B:oldfile -- a removal of oldfile with the version of newfile from A' +showing up in the result. + +The third case is interesting. It means that A:oldfile and A':newfile were +not just similar enough, but that the changes between them did not conflict +with the changes between A:oldfile and B:oldfile. This would validate our +hunch that the files were similar enough to be used in a three-way content +merge, and thus seems entirely correct for us to have used them that way. +(Sidenote: One particular example here may be enlightening. Let's say that +B was an immediate revert of A. B clearly would have been a clean revert +of A, since A was B's immediate parent. One would assume that if you can +pick a commit, you should also be able to cherry-pick its immediate revert. +However, this is one of those funny corner cases; without this +optimization, we just successfully picked a commit cleanly, but we are +unable to cherry-pick its immediate revert due to the size differences +between E:oldfile and A:oldfile.) + +That leaves only the first case to consider -- when we get conflicts both +with or without the optimization. Without the optimization, we'll have a +modify/delete conflict, where both A':newfile and B:oldfile are left in the +tree for the user to deal with and no hints about the potential similarity +between the two. With the optimization, we'll have a three-way content +merged A:oldfile, A':newfile, and B:oldfile with conflict markers +suggesting we thought the files were related but giving the user the chance +to resolve. As noted above, I don't think users will find us treating +'oldfile' and 'newfile' as related as a surprise since they were between E +and G. In any event, though, this case shouldn't be concerning since we +hit a conflict in both cases, told the user what we know, and asked them to +resolve it. + +So, in summary, case 4 is impossible, case 2 yields the same behavior, and +cases 1 and 3 seem to provide as good or better behavior with the +optimization than without. + + +=== 6. Interaction with skipping of "irrelevant" renames === + +Previous optimizations involved skipping rename detection for paths +considered to be "irrelevant". See for example the following commits: + + * 32a56dfb99 ("merge-ort: precompute subset of sources for which we + need rename detection", 2021-03-11) + * 2fd9eda462 ("merge-ort: precompute whether directory rename + detection is needed", 2021-03-11) + * 9bd342137e ("diffcore-rename: determine which relevant_sources are + no longer relevant", 2021-03-13) + +Relevance is always determined by what the _other_ side of history has +done, in terms of modifing a file that our side renamed, or adding a +file to a directory which our side renamed. This means that a path +that is "irrelevant" when picking the first commit of a series in a +rebase or cherry-pick, may suddenly become "relevant" when picking the +next commit. + +The upshot of this is that we can only cache rename detection results +for relevant paths, and need to re-check relevance in subsequent +commits. If those subsequent commits have additional paths that are +relevant for rename detection, then we will need to redo rename +detection -- though we can limit it to the paths for which we have not +already detected renames. + + +=== 7. Additional items that need to be cached === + +It turns out we have to cache more than just renames; we also cache: + + A) non-renames (i.e. unpaired deletes) + B) counts of renames within directories + C) sources that were marked as RELEVANT_LOCATION, but which were + downgraded to RELEVANT_NO_MORE + D) the toplevel trees involved in the merge + +These are all stored in struct rename_info, and respectively appear in + * cached_pairs (along side actual renames, just with a value of NULL) + * dir_rename_counts + * cached_irrelevant + * merge_trees + +The reason for (A) comes from the irrelevant renames skipping +optimization discussed in section 6. The fact that irrelevant renames +are skipped means we only get a subset of the potential renames +detected and subsequent commits may need to run rename detection on +the upstream side on a subset of the remaining renames (to get the +renames that are relevant for that later commit). Since unpaired +deletes are involved in rename detection too, we don't want to +repeatedly check that those paths remain unpaired on the upstream side +with every commit we are transplanting. + +The reason for (B) is that diffcore_rename_extended() is what +generates the counts of renames by directory which is needed in +directory rename detection, and if we don't run +diffcore_rename_extended() again then we need to have the output from +it, including dir_rename_counts, from the previous run. + +The reason for (C) is that merge-ort's tree traversal will again think +those paths are relevant (marking them as RELEVANT_LOCATION), but the +fact that they were downgraded to RELEVANT_NO_MORE means that +dir_rename_counts already has the information we need for directory +rename detection. (A path which becomes RELEVANT_CONTENT in a +subsequent commit will be removed from cached_irrelevant.) + +The reason for (D) is that is how we determine whether the remember +renames optimization can be used. In particular, remembering that our +sequence of merges looks like: + + Merge 1: + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + => Creates A' + + Merge 2: + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + => Creates B' + +It is the fact that the trees A and A' appear both in Merge 1 and in +Merge 2, with A as a parent of A' that allows this optimization. So +we store the trees to compare with what we are asked to merge next +time. + + +=== 8. How directory rename detection interacts with the above and === +=== why this optimization is still safe even if === +=== merge.directoryRenames is set to "true". === + +As noted in the assumptions section: + + """ + ...if directory renames do occur, then the default of + merge.directoryRenames being set to "conflict" means that the operation + will stop for users to resolve the conflicts and the cache will be + thrown away, and thus that there won't be an optimization to apply. + So, the only reason we need to address directory renames specifically, + is that some users will have set merge.directoryRenames to "true" to + allow the merges to continue to proceed automatically. + """ + +Let's remember that we need to look at how any given pick affects the next +one. So let's again use the first two picks from the diagram in section +one: + + First pick does this three-way merge: + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + => creates A' + + Second pick does this three-way merge: + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + => creates B' + +Now, directory rename detection exists so that if one side of history +renames a directory, and the other side adds a new file to the old +directory, then the merge (with merge.directoryRenames=true) can move the +file into the new directory. There are two qualitatively different ways to +add a new file to an old directory: create a new file, or rename a file +into that directory. Also, directory renames can be done on either side of +history, so there are four cases to consider: + + * MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir + * MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir + * MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir + * MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir + +One last note before we consider these four cases: There are some +important properties about how we implement this optimization with +respect to directory rename detection that we need to bear in mind +while considering all of these cases: + + * rename caching occurs *after* applying directory renames + + * a rename created by directory rename detection is recorded for the side + of history that did the directory rename. + + * dir_rename_counts, the nested map of + {oldname => {newname => count}}, + is cached between runs as well. This basically means that directory + rename detection is also cached, though only on the side of history + that we cache renames for (MERGE_SIDE1 as far as this document is + concerned; see the assumptions section). Two interesting sub-notes + about these counts: + + * If we need to perform rename-detection again on the given side (e.g. + some paths are relevant for rename detection that weren't before), + then we clear dir_rename_counts and recompute it, making use of + cached_pairs. The reason it is important to do this is optimizations + around RELEVANT_LOCATION exist to prevent us from computing + unnecessary renames for directory rename detection and from computing + dir_rename_counts for irrelevant directories; but those same renames + or directories may become necessary for subsequent merges. The + easiest way to "fix up" dir_rename_counts in such cases is to just + recompute it. + + * If we prune rename/rename(1to1) entries from the cache, then we also + need to update dir_rename_counts to decrement the counts for the + involved directory and any relevant parent directories (to undo what + update_dir_rename_counts() in diffcore-rename.c incremented when the + rename was initially found). If we instead just disable the + remembering renames optimization when the exceedingly rare + rename/rename(1to1) cases occur, then dir_rename_counts will get + re-computed the next time rename detection occurs, as noted above. + + * the side with multiple commits to pick, is the side of history that we + do NOT cache renames for. Thus, there are no additional commits to + change the number of renames in a directory, except for those done by + directory rename detection (which always pad the majority). + + * the "renames" we cache are modified slightly by any directory rename, + as noted below. + +Now, with those notes out of the way, let's go through the four cases +in order: + +Case 1: MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Renames olddir/ -> newdir/ + MERGE_SIDE2: A, Adds olddir/newfile + => creates A', With newdir/newfile + + MERGE_BASE: A, Has olddir/newfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Modifies olddir/newfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers olddir/ -> newdir/ + * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Case 2: MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir + + This case looks like this: + MERGE_BASE: E oldfile, olddir/ + MERGE_SIDE1: G oldfile, olddir/ -> newdir/ + MERGE_SIDE2: A oldfile -> olddir/newfile + => creates A', With newdir/newfile representing original oldfile + + MERGE_BASE: A olddir/newfile + MERGE_SIDE1: A' newdir/newfile + MERGE_SIDE2: B modify olddir/newfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers olddir/ -> newdir/ + * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile + (NOT oldfile -> newdir/newfile; compare to case with + (p->status == 'R' && new_path) in possibly_cache_new_pair()) + + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Case 3: MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Adds olddir/newfile + MERGE_SIDE2: A, Renames olddir/ -> newdir/ + => creates A', With newdir/newfile + + MERGE_BASE: A, Has newdir/, but no notion of newdir/newfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Has newdir/, but no notion of newdir/newfile + => expected B', with newdir/newfile from A' + + In this case, with the optimization, note that after the first commit there + were no renames on MERGE_SIDE1, and any renames on MERGE_SIDE2 are tossed. + But the second merge didn't need any renames so this is fine. + +Case 4: MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Renames oldfile -> olddir/newfile + MERGE_SIDE2: A, Renames olddir/ -> newdir/ + => creates A', With newdir/newfile representing original oldfile + + MERGE_BASE: A, Has oldfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Modifies oldfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers oldfile -> newdir/newfile + (NOT oldfile -> olddir/newfile; compare to case of second + block under p->status == 'R' in possibly_cache_new_pair()) + * MERGE_SIDE2 renames are tossed because only MERGE_SIDE1 is remembered + + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Finally, I'll just note here that interactions with the +skip-irrelevant-renames optimization means we sometimes don't detect +renames for any files within a directory that was renamed, in which +case we will not have been able to detect any rename for the directory +itself. In such a case, we do not know whether the directory was +renamed; we want to be careful to avoid cacheing some kind of "this +directory was not renamed" statement. If we did, then a subsequent +commit being rebased could add a file to the old directory, and the +user would expect it to end up in the correct directory -- something +our erroneous "this directory was not renamed" cache would preclude. |