diff options
Diffstat (limited to 'Documentation')
149 files changed, 6104 insertions, 1120 deletions
diff --git a/Documentation/CodingGuidelines b/Documentation/CodingGuidelines index 45465bc0c9..711cb9171e 100644 --- a/Documentation/CodingGuidelines +++ b/Documentation/CodingGuidelines @@ -175,6 +175,11 @@ For shell scripts specifically (not exhaustive): does not have such a problem. + - Even though "local" is not part of POSIX, we make heavy use of it + in our test suite. We do not use it in scripted Porcelains, and + hopefully nobody starts using "local" before they are reimplemented + in C ;-) + For C programs: @@ -498,7 +503,12 @@ Error Messages - Do not end error messages with a full stop. - - Do not capitalize ("unable to open %s", not "Unable to open %s") + - Do not capitalize the first word, only because it is the first word + in the message ("unable to open %s", not "Unable to open %s"). But + "SHA-3 not supported" is fine, because the reason the first word is + capitalized is not because it is at the beginning of the sentence, + but because the word would be spelled in capital letters even when + it appeared in the middle of the sentence. - Say what the error is first ("cannot open %s", not "%s: cannot open") @@ -541,6 +551,51 @@ Writing Documentation: documentation, please see the documentation-related advice in the Documentation/SubmittingPatches file). + In order to ensure the documentation is inclusive, avoid assuming + that an unspecified example person is male or female, and think + twice before using "he", "him", "she", or "her". Here are some + tips to avoid use of gendered pronouns: + + - Prefer succinctness and matter-of-factly describing functionality + in the abstract. E.g. + + --short:: Emit output in the short-format. + + and avoid something like these overly verbose alternatives: + + --short:: Use this to emit output in the short-format. + --short:: You can use this to get output in the short-format. + --short:: A user who prefers shorter output could.... + --short:: Should a person and/or program want shorter output, he + she/they/it can... + + This practice often eliminates the need to involve human actors in + your description, but it is a good practice regardless of the + avoidance of gendered pronouns. + + - When it becomes awkward to stick to this style, prefer "you" when + addressing the the hypothetical user, and possibly "we" when + discussing how the program might react to the user. E.g. + + You can use this option instead of --xyz, but we might remove + support for it in future versions. + + while keeping in mind that you can probably be less verbose, e.g. + + Use this instead of --xyz. This option might be removed in future + versions. + + - If you still need to refer to an example person that is + third-person singular, you may resort to "singular they" to avoid + "he/she/him/her", e.g. + + A contributor asks their upstream to pull from them. + + Note that this sounds ungrammatical and unnatural to those who + learned that "they" is only used for third-person plural, e.g. + those who learn English as a second language in some parts of the + world. + Every user-visible change should be reflected in the documentation. The same general rule as for code applies -- imitate the existing conventions. diff --git a/Documentation/Makefile b/Documentation/Makefile index b980407059..f5605b7767 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -2,6 +2,8 @@ MAN1_TXT = MAN5_TXT = MAN7_TXT = +HOWTO_TXT = +DOC_DEP_TXT = TECH_DOCS = ARTICLES = SP_ARTICLES = @@ -21,6 +23,7 @@ MAN1_TXT += gitweb.txt MAN5_TXT += gitattributes.txt MAN5_TXT += githooks.txt MAN5_TXT += gitignore.txt +MAN5_TXT += gitmailmap.txt MAN5_TXT += gitmodules.txt MAN5_TXT += gitrepository-layout.txt MAN5_TXT += gitweb.conf.txt @@ -41,6 +44,11 @@ MAN7_TXT += gittutorial-2.txt MAN7_TXT += gittutorial.txt MAN7_TXT += gitworkflows.txt +HOWTO_TXT += $(wildcard howto/*.txt) + +DOC_DEP_TXT += $(wildcard *.txt) +DOC_DEP_TXT += $(wildcard config/*.txt) + ifdef MAN_FILTER MAN_TXT = $(filter $(MAN_FILTER),$(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)) else @@ -75,6 +83,7 @@ SP_ARTICLES += howto/rebuild-from-update-hook SP_ARTICLES += howto/rebase-from-internal-branch SP_ARTICLES += howto/keep-canonical-history-correct SP_ARTICLES += howto/maintain-git +SP_ARTICLES += howto/coordinate-embargoed-releases API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt))) SP_ARTICLES += $(API_DOCS) @@ -89,6 +98,7 @@ TECH_DOCS += technical/multi-pack-index TECH_DOCS += technical/pack-format TECH_DOCS += technical/pack-heuristics TECH_DOCS += technical/pack-protocol +TECH_DOCS += technical/parallel-checkout TECH_DOCS += technical/partial-clone TECH_DOCS += technical/protocol-capabilities TECH_DOCS += technical/protocol-common @@ -129,6 +139,7 @@ ASCIIDOC_CONF = -f asciidoc.conf ASCIIDOC_COMMON = $(ASCIIDOC) $(ASCIIDOC_EXTRA) $(ASCIIDOC_CONF) \ -amanversion=$(GIT_VERSION) \ -amanmanual='Git Manual' -amansource='Git' +ASCIIDOC_DEPS = asciidoc.conf GIT-ASCIIDOCFLAGS TXT_TO_HTML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_HTML) TXT_TO_XML = $(ASCIIDOC_COMMON) -b $(ASCIIDOC_DOCBOOK) MANPAGE_XSL = manpage-normal.xsl @@ -183,6 +194,7 @@ ASCIIDOC_DOCBOOK = docbook5 ASCIIDOC_EXTRA += -acompat-mode -atabsize=8 ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions ASCIIDOC_EXTRA += -alitdd='&\#x2d;&\#x2d;' +ASCIIDOC_DEPS = asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS DBLATEX_COMMON = XMLTO_EXTRA += --skip-validation XMLTO_EXTRA += -x manpage.xsl @@ -283,10 +295,8 @@ docdep_prereqs = \ mergetools-list.made $(mergetools_txt) \ cmd-list.made $(cmds_txt) -doc.dep : $(docdep_prereqs) $(wildcard *.txt) $(wildcard config/*.txt) build-docdep.perl - $(QUIET_GEN)$(RM) $@+ $@ && \ - $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ - mv $@+ $@ +doc.dep : $(docdep_prereqs) $(DOC_DEP_TXT) build-docdep.perl + $(QUIET_GEN)$(PERL_PATH) ./build-docdep.perl >$@ $(QUIET_STDERR) ifneq ($(MAKECMDGOALS),clean) -include doc.dep @@ -306,8 +316,7 @@ cmds_txt = cmds-ancillaryinterrogators.txt \ $(cmds_txt): cmd-list.made cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) - $(QUIET_GEN)$(RM) $@ && \ - $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \ + $(QUIET_GEN)$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(cmds_txt) $(QUIET_STDERR) && \ date >$@ mergetools_txt = mergetools-diff.txt mergetools-merge.txt @@ -315,7 +324,7 @@ mergetools_txt = mergetools-diff.txt mergetools-merge.txt $(mergetools_txt): mergetools-list.made mergetools-list.made: ../git-mergetool--lib.sh $(wildcard ../mergetools/*) - $(QUIET_GEN)$(RM) $@ && \ + $(QUIET_GEN) \ $(SHELL_PATH) -c 'MERGE_TOOLS_DIR=../mergetools && \ . ../git-mergetool--lib.sh && \ show_tool_names can_diff "* " || :' >mergetools-diff.txt && \ @@ -344,32 +353,23 @@ clean: $(RM) manpage-base-url.xsl $(RM) GIT-ASCIIDOCFLAGS -$(MAN_HTML): %.html : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_HTML) -d manpage -o $@+ $< && \ - mv $@+ $@ +$(MAN_HTML): %.html : %.txt $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_HTML) -d manpage -o $@ $< -$(OBSOLETE_HTML): %.html : %.txto asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_HTML) -o $@+ $< && \ - mv $@+ $@ +$(OBSOLETE_HTML): %.html : %.txto $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_HTML) -o $@ $< manpage-base-url.xsl: manpage-base-url.xsl.in $(QUIET_GEN)sed "s|@@MAN_BASE_URL@@|$(MAN_BASE_URL)|" $< > $@ %.1 %.5 %.7 : %.xml manpage-base-url.xsl $(wildcard manpage*.xsl) - $(QUIET_XMLTO)$(RM) $@ && \ - $(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + $(QUIET_XMLTO)$(XMLTO) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< -%.xml : %.txt asciidoc.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_XML) -d manpage -o $@+ $< && \ - mv $@+ $@ +%.xml : %.txt $(ASCIIDOC_DEPS) + $(QUIET_ASCIIDOC)$(TXT_TO_XML) -d manpage -o $@ $< user-manual.xml: user-manual.txt user-manual.conf asciidoctor-extensions.rb GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(TXT_TO_XML) -d book -o $@+ $< && \ - mv $@+ $@ + $(QUIET_ASCIIDOC)$(TXT_TO_XML) -d book -o $@ $< technical/api-index.txt: technical/api-index-skel.txt \ technical/api-index.sh $(patsubst %,%.txt,$(API_DOCS)) @@ -390,46 +390,35 @@ XSLTOPTS += --stringparam html.stylesheet docbook-xsl.css XSLTOPTS += --param generate.consistent.ids 1 user-manual.html: user-manual.xml $(XSLT) - $(QUIET_XSLTPROC)$(RM) $@+ $@ && \ - xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \ - mv $@+ $@ + $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< git.info: user-manual.texi $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi user-manual.texi: user-manual.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ - $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ + $(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@+ && \ + $(PERL_PATH) fix-texi.perl <$@+ >$@ && \ + $(RM) $@+ user-manual.pdf: user-manual.xml - $(QUIET_DBLATEX)$(RM) $@+ $@ && \ - $(DBLATEX) -o $@+ $(DBLATEX_COMMON) $< && \ - mv $@+ $@ + $(QUIET_DBLATEX)$(DBLATEX) -o $@ $(DBLATEX_COMMON) $< gitman.texi: $(MAN_XML) cat-texi.perl texi.xsl - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ + $(QUIET_DB2TEXI) \ ($(foreach xml,$(sort $(MAN_XML)),xsltproc -o $(xml)+ texi.xsl $(xml) && \ $(DOCBOOK2X_TEXI) --encoding=UTF-8 --to-stdout $(xml)+ && \ - rm $(xml)+ &&) true) > $@++ && \ - $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ + $(RM) $(xml)+ &&) true) > $@+ && \ + $(PERL_PATH) cat-texi.perl $@ <$@+ >$@ && \ + $(RM) $@+ gitman.info: gitman.texi $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \ - mv $@+ $@ + $(QUIET_DB2TEXI)$(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@ -howto-index.txt: howto-index.sh $(wildcard howto/*.txt) - $(QUIET_GEN)$(RM) $@+ $@ && \ - '$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(wildcard howto/*.txt)) >$@+ && \ - mv $@+ $@ +howto-index.txt: howto-index.sh $(HOWTO_TXT) + $(QUIET_GEN)'$(SHELL_PATH_SQ)' ./howto-index.sh $(sort $(HOWTO_TXT)) >$@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt $(QUIET_ASCIIDOC)$(TXT_TO_HTML) $*.txt @@ -437,11 +426,10 @@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt WEBDOC_DEST = /pub/software/scm/git/docs howto/%.html: ASCIIDOC_EXTRA += -a git-relative-html-prefix=../ -$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt GIT-ASCIIDOCFLAGS - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ +$(patsubst %.txt,%.html,$(HOWTO_TXT)): %.html : %.txt GIT-ASCIIDOCFLAGS + $(QUIET_ASCIIDOC) \ sed -e '1,/^$$/d' $< | \ - $(TXT_TO_HTML) - >$@+ && \ - mv $@+ $@ + $(TXT_TO_HTML) - >$@ install-webdoc : html '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) @@ -469,11 +457,20 @@ print-man1: @for i in $(MAN1_TXT); do echo $$i; done lint-docs:: - $(QUIET_LINT)$(PERL_PATH) lint-gitlink.perl + $(QUIET_LINT)$(PERL_PATH) lint-gitlink.perl \ + $(HOWTO_TXT) $(DOC_DEP_TXT) \ + --section=1 $(MAN1_TXT) \ + --section=5 $(MAN5_TXT) \ + --section=7 $(MAN7_TXT); \ + $(PERL_PATH) lint-man-end-blurb.perl $(MAN_TXT); \ + $(PERL_PATH) lint-man-section-order.perl $(MAN_TXT); ifeq ($(wildcard po/Makefile),po/Makefile) doc-l10n install-l10n:: $(MAKE) -C po $@ endif +# Delete the target file on error +.DELETE_ON_ERROR: + .PHONY: FORCE diff --git a/Documentation/MyFirstContribution.txt b/Documentation/MyFirstContribution.txt index 7c9a037cc2..015cf24631 100644 --- a/Documentation/MyFirstContribution.txt +++ b/Documentation/MyFirstContribution.txt @@ -47,7 +47,7 @@ Veteran contributors who are especially interested in helping mentor newcomers are present on the list. In order to avoid search indexers, group membership is required to view messages; anyone can join and no approval is required. -==== https://webchat.freenode.net/#git-devel[#git-devel] on Freenode +==== https://web.libera.chat/#git-devel[#git-devel] on Libera Chat This IRC channel is for conversations between Git contributors. If someone is currently online and knows the answer to your question, you can receive help @@ -664,7 +664,7 @@ mention the right animal somewhere: ---- test_expect_success 'runs correctly with no args and good output' ' git psuh >actual && - test_i18ngrep Pony actual + grep Pony actual ' ---- @@ -827,7 +827,7 @@ either examining recent pull requests where someone has been granted `/allow` (https://github.com/gitgitgadget/git/pulls?utf8=%E2%9C%93&q=is%3Apr+is%3Aopen+%22%2Fallow%22[Search: is:pr is:open "/allow"]), in which case both the author and the person who granted the `/allow` can now `/allow` you, or by inquiring on the -https://webchat.freenode.net/#git-devel[#git-devel] IRC channel on Freenode +https://web.libera.chat/#git-devel[#git-devel] IRC channel on Libera Chat linking your pull request and asking for someone to `/allow` you. If the CI fails, you can update your changes with `git rebase -i` and push your diff --git a/Documentation/MyFirstObjectWalk.txt b/Documentation/MyFirstObjectWalk.txt index 2d10eea7a9..45eb84d8b4 100644 --- a/Documentation/MyFirstObjectWalk.txt +++ b/Documentation/MyFirstObjectWalk.txt @@ -691,7 +691,7 @@ help understand. In our case, that means we omit trees and blobs not directly referenced by `HEAD` or `HEAD`'s history, because we begin the walk with only `HEAD` in the `pending` list.) -First, we'll need to `#include "list-objects-filter-options.h`" and set up the +First, we'll need to `#include "list-objects-filter-options.h"` and set up the `struct list_objects_filter_options` at the top of the function. ---- @@ -779,7 +779,7 @@ Count all the objects within and modify the print statement: while ((oid = oidset_iter_next(&oit))) omitted_count++; - printf("commits %d\nblobs %d\ntags %d\ntrees%d\nomitted %d\n", + printf("commits %d\nblobs %d\ntags %d\ntrees %d\nomitted %d\n", commit_count, blob_count, tag_count, tree_count, omitted_count); ---- diff --git a/Documentation/RelNotes/1.6.0.3.txt b/Documentation/RelNotes/1.6.0.3.txt index ae0577836a..ad36c0f0b7 100644 --- a/Documentation/RelNotes/1.6.0.3.txt +++ b/Documentation/RelNotes/1.6.0.3.txt @@ -50,7 +50,7 @@ Fixes since v1.6.0.2 if the working tree is currently dirty. * "git for-each-ref --format=%(subject)" fixed for commits with no - no newline in the message body. + newline in the message body. * "git remote" fixed to protect printf from user input. diff --git a/Documentation/RelNotes/1.8.4.txt b/Documentation/RelNotes/1.8.4.txt index 255e185af6..2e7529928b 100644 --- a/Documentation/RelNotes/1.8.4.txt +++ b/Documentation/RelNotes/1.8.4.txt @@ -365,7 +365,7 @@ details). (merge 2fbd4f9 mh/maint-lockfile-overflow later to maint). * Invocations of "git checkout" used internally by "git rebase" were - counted as "checkout", and affected later "git checkout -" to the + counted as "checkout", and affected later "git checkout -", which took the user to an unexpected place. (merge 3bed291 rr/rebase-checkout-reflog later to maint). diff --git a/Documentation/RelNotes/2.29.0.txt b/Documentation/RelNotes/2.29.0.txt index 06ba2f803f..1f41302ebb 100644 --- a/Documentation/RelNotes/2.29.0.txt +++ b/Documentation/RelNotes/2.29.0.txt @@ -184,8 +184,8 @@ Performance, Internal Implementation, Development Support etc. the ref backend in use, as its format is much richer than the normal refs, and written directly by "git fetch" as a plain file.. - * An unused binary has been discarded, and and a bunch of commands - have been turned into into built-in. + * An unused binary has been discarded, and a bunch of commands + have been turned into built-in. * A handful of places in in-tree code still relied on being able to execute the git subcommands, especially built-ins, in "git-foo" diff --git a/Documentation/RelNotes/2.31.0.txt b/Documentation/RelNotes/2.31.0.txt new file mode 100644 index 0000000000..cf0c7d8d40 --- /dev/null +++ b/Documentation/RelNotes/2.31.0.txt @@ -0,0 +1,365 @@ +Git 2.31 Release Notes +====================== + +Updates since v2.30 +------------------- + +Backward incompatible and other important changes + + * The "pack-redundant" command, which has been left stale with almost + unusable performance issues, now warns loudly when it gets used, as + we no longer want to recommend its use (instead just "repack -d" + instead). + + * The development community has adopted Contributor Covenant v2.0 to + update from v1.4 that we have been using. + + * The support for deprecated PCRE1 library has been dropped. + + * Fixes for CVE-2021-21300 in Git 2.30.2 (and earlier) is included. + + +UI, Workflows & Features + + * The "--format=%(trailers)" mechanism gets enhanced to make it + easier to design output for machine consumption. + + * When a user does not tell "git pull" to use rebase or merge, the + command gives a loud message telling a user to choose between + rebase or merge but creates a merge anyway, forcing users who would + want to rebase to redo the operation. Fix an early part of this + problem by tightening the condition to give the message---there is + no reason to stop or force the user to choose between rebase or + merge if the history fast-forwards. + + * The configuration variable 'core.abbrev' can be set to 'no' to + force no abbreviation regardless of the hash algorithm. + + * "git rev-parse" can be explicitly told to give output as absolute + or relative path with the `--path-format=(absolute|relative)` option. + + * Bash completion (in contrib/) update to make it easier for + end-users to add completion for their custom "git" subcommands. + + * "git maintenance" learned to drive scheduled maintenance on + platforms whose native scheduling methods are not 'cron'. + + * After expiring a reflog and making a single commit, the reflog for + the branch would record a single entry that knows both @{0} and + @{1}, but we failed to answer "what commit were we on?", i.e. @{1} + + * "git bundle" learns "--stdin" option to read its refs from the + standard input. Also, it now does not lose refs whey they point + at the same object. + + * "git log" learned a new "--diff-merges=<how>" option. + + * "git ls-files" can and does show multiple entries when the index is + unmerged, which is a source for confusion unless -s/-u option is in + use. A new option --deduplicate has been introduced. + + * `git worktree list` now annotates worktrees as prunable, shows + locked and prunable attributes in --porcelain mode, and gained + a --verbose option. + + * "git clone" tries to locally check out the branch pointed at by + HEAD of the remote repository after it is done, but the protocol + did not convey the information necessary to do so when copying an + empty repository. The protocol v2 learned how to do so. + + * There are other ways than ".." for a single token to denote a + "commit range", namely "<rev>^!" and "<rev>^-<n>", but "git + range-diff" did not understand them. + + * The "git range-diff" command learned "--(left|right)-only" option + to show only one side of the compared range. + + * "git mergetool" feeds three versions (base, local and remote) of + a conflicted path unmodified. The command learned to optionally + prepare these files with unconflicted parts already resolved. + + * The .mailmap is documented to be read only from the root level of a + working tree, but a stray file in a bare repository also was read + by accident, which has been corrected. + + * "git maintenance" tool learned a new "pack-refs" maintenance task. + + * The error message given when a configuration variable that is + expected to have a boolean value has been improved. + + * Signed commits and tags now allow verification of objects, whose + two object names (one in SHA-1, the other in SHA-256) are both + signed. + + * "git rev-list" command learned "--disk-usage" option. + + * "git {diff,log} --{skip,rotate}-to=<path>" allows the user to + discard diff output for early paths or move them to the end of the + output. + + * "git difftool" learned "--skip-to=<path>" option to restart an + interrupted session from an arbitrary path. + + * "git grep" has been tweaked to be limited to the sparse checkout + paths. + + * "git rebase --[no-]fork-point" gained a configuration variable + rebase.forkPoint so that users do not have to keep specifying a + non-default setting. + + +Performance, Internal Implementation, Development Support etc. + + * A 3-year old test that was not testing anything useful has been + corrected. + + * Retire more names with "sha1" in it. + + * The topological walk codepath is covered by new trace2 stats. + + * Update the Code-of-conduct to version 2.0 from the upstream (we've + been using version 1.4). + + * "git mktag" validates its input using its own rules before writing + a tag object---it has been updated to share the logic with "git + fsck". + + * Two new ways to feed configuration variable-value pairs via + environment variables have been introduced, and the way + GIT_CONFIG_PARAMETERS encodes variable/value pairs has been tweaked + to make it more robust. + + * Tests have been updated so that they do not to get affected by the + name of the default branch "git init" creates. + + * "git fetch" learns to treat ref updates atomically in all-or-none + fashion, just like "git push" does, with the new "--atomic" option. + + * The peel_ref() API has been replaced with peel_iterated_oid(). + + * The .use_shell flag in struct child_process that is passed to + run_command() API has been clarified with a bit more documentation. + + * Document, clean-up and optimize the code around the cache-tree + extension in the index. + + * The ls-refs protocol operation has been optimized to narrow the + sub-hierarchy of refs/ it walks to produce response. + + * When removing many branches and tags, the code used to do so one + ref at a time. There is another API it can use to delete multiple + refs, and it makes quite a lot of performance difference when the + refs are packed. + + * The "pack-objects" command needs to iterate over all the tags when + automatic tag following is enabled, but it actually iterated over + all refs and then discarded everything outside "refs/tags/" + hierarchy, which was quite wasteful. + + * A perf script was made more portable. + + * Our setting of GitHub CI test jobs were a bit too eager to give up + once there is even one failure found. Tweak the knob to allow + other jobs keep running even when we see a failure, so that we can + find more failures in a single run. + + * We've carried compatibility codepaths for compilers without + variadic macros for quite some time, but the world may be ready for + them to be removed. Force compilation failure on exotic platforms + where variadic macros are not available to find out who screams in + such a way that we can easily revert if it turns out that the world + is not yet ready. + + * Code clean-up to ensure our use of hashtables using object names as + keys use the "struct object_id" objects, not the raw hash values. + + * Lose the debugging aid that may have been useful in the past, but + no longer is, in the "grep" codepaths. + + * Some pretty-format specifiers do not need the data in commit object + (e.g. "%H"), but we were over-eager to load and parse it, which has + been made even lazier. + + * Get rid of "GETTEXT_POISON" support altogether, which may or may + not be controversial. + + * Introduce an on-disk file to record revindex for packdata, which + traditionally was always created on the fly and only in-core. + + * The commit-graph learned to use corrected commit dates instead of + the generation number to help topological revision traversal. + + * Piecemeal of rewrite of "git bisect" in C continues. + + * When a pager spawned by us exited, the trace log did not record its + exit status correctly, which has been corrected. + + * Removal of GIT_TEST_GETTEXT_POISON continues. + + * The code to implement "git merge-base --independent" was poorly + done and was kept from the very beginning of the feature. + + * Preliminary changes to fsmonitor integration. + + * Performance improvements for rename detection. + + * The common code to deal with "chunked file format" that is shared + by the multi-pack-index and commit-graph files have been factored + out, to help codepaths for both filetypes to become more robust. + + * The approach to "fsck" the incoming objects in "index-pack" is + attractive for performance reasons (we have them already in core, + inflated and ready to be inspected), but fundamentally cannot be + applied fully when we receive more than one pack stream, as a tree + object in one pack may refer to a blob object in another pack as + ".gitmodules", when we want to inspect blobs that are used as + ".gitmodules" file, for example. Teach "index-pack" to emit + objects that must be inspected later and check them in the calling + "fetch-pack" process. + + * The logic to handle "trailer" related placeholders in the + "--format=" mechanisms in the "log" family and "for-each-ref" + family is getting unified. + + * Raise the buffer size used when writing the index file out from + (obviously too small) 8kB to (clearly sufficiently large) 128kB. + + * It is reported that open() on some platforms (e.g. macOS Big Sur) + can return EINTR even though our timers are set up with SA_RESTART. + A workaround has been implemented and enabled for macOS to rerun + open() transparently from the caller when this happens. + + +Fixes since v2.30 +----------------- + + * Diagnose command line error of "git rebase" early. + + * Clean up option descriptions in "git cmd --help". + + * "git stash" did not work well in a sparsely checked out working + tree. + + * Some tests expect that "ls -l" output has either '-' or 'x' for + group executable bit, but setgid bit can be inherited from parent + directory and make these fields 'S' or 's' instead, causing test + failures. + + * "git for-each-repo --config=<var> <cmd>" should not run <cmd> for + any repository when the configuration variable <var> is not defined + even once. + + * Fix 2.29 regression where "git mergetool --tool-help" fails to list + all the available tools. + + * Fix for procedure to building CI test environment for mac. + + * The implementation of "git branch --sort" wrt the detached HEAD + display has always been hacky, which has been cleaned up. + + * Newline characters in the host and path part of git:// URL are + now forbidden. + + * "git diff" showed a submodule working tree with untracked cruft as + "Submodule commit <objectname>-dirty", but a natural expectation is + that the "-dirty" indicator would align with "git describe --dirty", + which does not consider having untracked files in the working tree + as source of dirtiness. The inconsistency has been fixed. + + * When more than one commit with the same patch ID appears on one + side, "git log --cherry-pick A...B" did not exclude them all when a + commit with the same patch ID appears on the other side. Now it + does. + + * Documentation for "git fsck" lost stale bits that has become + incorrect. + + * Doc fix for packfile URI feature. + + * When "git rebase -i" processes "fixup" insn, there is no reason to + clean up the commit log message, but we did the usual stripspace + processing. This has been corrected. + (merge f7d42ceec5 js/rebase-i-commit-cleanup-fix later to maint). + + * Fix in passing custom args from "git clone" to "upload-pack" on the + other side. + (merge ad6b5fefbd jv/upload-pack-filter-spec-quotefix later to maint). + + * The command line completion (in contrib/) completed "git branch -d" + with branch names, but "git branch -D" offered tagnames in addition, + which has been corrected. "git branch -M" had the same problem. + (merge 27dc071b9a jk/complete-branch-force-delete later to maint). + + * When commands are started from a subdirectory, they may have to + compare the path to the subdirectory (called prefix and found out + from $(pwd)) with the tracked paths. On macOS, $(pwd) and + readdir() yield decomposed path, while the tracked paths are + usually normalized to the precomposed form, causing mismatch. This + has been fixed by taking the same approach used to normalize the + command line arguments. + (merge 5c327502db tb/precompose-prefix-too later to maint). + + * Even though invocations of "die()" were logged to the trace2 + system, "BUG()"s were not, which has been corrected. + (merge 0a9dde4a04 jt/trace2-BUG later to maint). + + * "git grep --untracked" is meant to be "let's ALSO find in these + files on the filesystem" when looking for matches in the working + tree files, and does not make any sense if the primary search is + done against the index, or the tree objects. The "--cached" and + "--untracked" options have been marked as mutually incompatible. + (merge 0c5d83b248 mt/grep-cached-untracked later to maint). + + * Fix "git fsck --name-objects" which apparently has not been used by + anybody who is motivated enough to report breakage. + (merge e89f89361c js/fsck-name-objects-fix later to maint). + + * Avoid individual tests in t5411 from getting affected by each other + by forcing them to use separate output files during the test. + (merge 822ee894f6 jx/t5411-unique-filenames later to maint). + + * Test to make sure "git rev-parse one-thing one-thing" gives + the same thing twice (when one-thing is --since=X). + (merge a5cdca4520 ew/rev-parse-since-test later to maint). + + * When certain features (e.g. grafts) used in the repository are + incompatible with the use of the commit-graph, we used to silently + turned commit-graph off; we now tell the user what we are doing. + (merge c85eec7fc3 js/commit-graph-warning later to maint). + + * Objects that lost references can be pruned away, even when they + have notes attached to it (and these notes will become dangling, + which in turn can be pruned with "git notes prune"). This has been + clarified in the documentation. + (merge fa9ab027ba mz/doc-notes-are-not-anchors later to maint). + + * The error codepath around the "--temp/--prefix" feature of "git + checkout-index" has been improved. + (merge 3f7ba60350 mt/checkout-index-corner-cases later to maint). + + * The "git maintenance register" command had trouble registering bare + repositories, which had been corrected. + + * A handful of multi-word configuration variable names in + documentation that are spelled in all lowercase have been corrected + to use the more canonical camelCase. + (merge 7dd0eaa39c dl/doc-config-camelcase later to maint). + + * "git push $there --delete ''" should have been diagnosed as an + error, but instead turned into a matching push, which has been + corrected. + (merge 20e416409f jc/push-delete-nothing later to maint). + + * Test script modernization. + (merge 488acf15df sv/t7001-modernize later to maint). + + * An under-allocation for the untracked cache data has been corrected. + (merge 6347d649bc jh/untracked-cache-fix later to maint). + + * Other code cleanup, docfix, build fix, etc. + (merge e3f5da7e60 sg/t7800-difftool-robustify later to maint). + (merge 9d336655ba js/doc-proto-v2-response-end later to maint). + (merge 1b5b8cf072 jc/maint-column-doc-typofix later to maint). + (merge 3a837b58e3 cw/pack-config-doc later to maint). + (merge 01168a9d89 ug/doc-commit-approxidate later to maint). + (merge b865734760 js/params-vs-args later to maint). diff --git a/Documentation/RelNotes/2.31.1.txt b/Documentation/RelNotes/2.31.1.txt new file mode 100644 index 0000000000..f9b06b8e1b --- /dev/null +++ b/Documentation/RelNotes/2.31.1.txt @@ -0,0 +1,27 @@ +Git 2.31.1 Release Notes +======================== + +Fixes since v2.31 +----------------- + + * The fsmonitor interface read from its input without making sure + there is something to read from. This bug is new in 2.31 + timeframe. + + * The data structure used by fsmonitor interface was not properly + duplicated during an in-core merge, leading to use-after-free etc. + + * "git bisect" reimplemented more in C during 2.30 timeframe did not + take an annotated tag as a good/bad endpoint well. This regression + has been corrected. + + * Fix macros that can silently inject unintended null-statements. + + * CALLOC_ARRAY() macro replaces many uses of xcalloc(). + + * Update insn in Makefile comments to run fuzz-all target. + + * Fix a corner case bug in "git mv" on case insensitive systems, + which was introduced in 2.29 timeframe. + +Also contains various documentation updates and code clean-ups. diff --git a/Documentation/RelNotes/2.32.0.txt b/Documentation/RelNotes/2.32.0.txt new file mode 100644 index 0000000000..87d56fa1aa --- /dev/null +++ b/Documentation/RelNotes/2.32.0.txt @@ -0,0 +1,416 @@ +Git 2.32 Release Notes +====================== + +Backward compatibility notes +---------------------------- + + * ".gitattributes", ".gitignore", and ".mailmap" files that are + symbolic links are ignored. + + * "git apply --3way" used to first attempt a straight application, + and only fell back to the 3-way merge algorithm when the stright + application failed. Starting with this version, the command will + first try the 3-way merge algorithm and only when it fails (either + resulting with conflict or the base versions of blobs are missing), + falls back to the usual patch application. + + +Updates since v2.31 +------------------- + +UI, Workflows & Features + + * It does not make sense to make ".gitattributes", ".gitignore" and + ".mailmap" symlinks, as they are supposed to be usable from the + object store (think: bare repositories where HEAD:.mailmap etc. are + used). When these files are symbolic links, we used to read the + contents of the files pointed by them by mistake, which has been + corrected. + + * "git stash show" learned to optionally show untracked part of the + stash. + + * "git log --format='...'" learned "%(describe)" placeholder. + + * "git repack" so far has been only capable of repacking everything + under the sun into a single pack (or split by size). A cleverer + strategy to reduce the cost of repacking a repository has been + introduced. + + * The http codepath learned to let the credential layer to cache the + password used to unlock a certificate that has successfully been + used. + + * "git commit --fixup=<commit>", which was to tweak the changes made + to the contents while keeping the original log message intact, + learned "--fixup=(amend|reword):<commit>", that can be used to + tweak both the message and the contents, and only the message, + respectively. + + * "git send-email" learned to honor the core.hooksPath configuration. + + * "git format-patch -v<n>" learned to allow a reroll count that is + not an integer. + + * "git commit" learned "--trailer <key>[=<value>]" option; together + with the interpret-trailers command, this will make it easier to + support custom trailers. + + * "git clone --reject-shallow" option fails the clone as soon as we + notice that we are cloning from a shallow repository. + + * A configuration variable has been added to force tips of certain + refs to be given a reachability bitmap. + + * "gitweb" learned "e-mail privacy" feature to redact strings that + look like e-mail addresses on various pages. + + * "git apply --3way" has always been "to fall back to 3-way merge + only when straight application fails". Swap the order of falling + back so that 3-way is always attempted first (only when the option + is given, of course) and then straight patch application is used as + a fallback when it fails. + + * "git apply" now takes "--3way" and "--cached" at the same time, and + work and record results only in the index. + + * The command line completion (in contrib/) has learned that + CHERRY_PICK_HEAD is a possible pseudo-ref. + + * Userdiff patterns for "Scheme" has been added. + + * "git log" learned "--diff-merges=<style>" option, with an + associated configuration variable log.diffMerges. + + * "git log --format=..." placeholders learned %ah/%ch placeholders to + request the --date=human output. + + * Replace GIT_CONFIG_NOSYSTEM mechanism to decline from reading the + system-wide configuration file with GIT_CONFIG_SYSTEM that lets + users specify from which file to read the system-wide configuration + (setting it to an empty file would essentially be the same as + setting NOSYSTEM), and introduce GIT_CONFIG_GLOBAL to override the + per-user configuration in $HOME/.gitconfig. + + * "git add" and "git rm" learned not to touch those paths that are + outside of sparse checkout. + + * "git rev-list" learns the "--filter=object:type=<type>" option, + which can be used to exclude objects of the given kind from the + packfile generated by pack-objects. + + * The command line completion (in contrib/) for "git stash" has been + updated. + + * "git subtree" updates. + + * It is now documented that "format-patch" skips merges. + + * Options to "git pack-objects" that take numeric values like + --window and --depth should not accept negative values; the input + validation has been tightened. + + * The way the command line specified by the trailer.<token>.command + configuration variable receives the end-user supplied value was + both error prone and misleading. An alternative to achieve the + same goal in a safer and more intuitive way has been added, as + the trailer.<token>.cmd configuration variable, to replace it. + + * "git add -i --dry-run" does not dry-run, which was surprising. The + combination of options has taught to error out. + + * "git push" learns to discover common ancestor with the receiving + end over protocol v2. This will hopefully make "git push" as + efficient as "git fetch" in avoiding objects from getting + transferred unnecessarily. + + * "git mailinfo" (hence "git am") learned the "--quoted-cr" option to + control how lines ending with CRLF wrapped in base64 or qp are + handled. + + +Performance, Internal Implementation, Development Support etc. + + * Rename detection rework continues. + + * GIT_TEST_FAIL_PREREQS is a mechanism to skip test pieces with + prerequisites to catch broken tests that depend on the side effects + of optional pieces, but did not work at all when negative + prerequisites were involved. + (merge 27d578d904 jk/fail-prereq-testfix later to maint). + + * "git diff-index" codepath has been taught to trust fsmonitor status + to reduce number of lstat() calls. + (merge 7e5aa13d2c nk/diff-index-fsmonitor later to maint). + + * Reorganize Makefile to allow building git.o and other essential + objects without extra stuff needed only for testing. + + * Preparatory API changes for parallel checkout. + + * A simple IPC interface gets introduced to build services like + fsmonitor on top. + + * Fsck API clean-up. + + * SECURITY.md that is facing individual contributors and end users + has been introduced. Also a procedure to follow when preparing + embargoed releases has been spelled out. + (merge 09420b7648 js/security-md later to maint). + + * Optimize "rev-list --use-bitmap-index --objects" corner case that + uses negative tags as the stopping points. + + * CMake update for vsbuild. + + * An on-disk reverse-index to map the in-pack location of an object + back to its object name across multiple packfiles is introduced. + + * Generate [ec]tags under $(QUIET_GEN). + + * Clean-up codepaths that implements "git send-email --validate" + option and improves the message from it. + + * The last remnant of gettext-poison has been removed. + + * The test framework has been taught to optionally turn the default + merge strategy to "ort" throughout the system where we use + three-way merges internally, like cherry-pick, rebase etc., + primarily to enhance its test coverage (the strategy has been + available as an explicit "-s ort" choice). + + * A bit of code clean-up and a lot of test clean-up around userdiff + area. + + * Handling of "promisor packs" that allows certain objects to be + missing and lazily retrievable has been optimized (a bit). + + * When packet_write() fails, we gave an extra error message + unnecessarily, which has been corrected. + + * The checkout machinery has been taught to perform the actual + write-out of the files in parallel when able. + + * Show errno in the trace output in the error codepath that calls + read_raw_ref method. + + * Effort to make the command line completion (in contrib/) safe with + "set -u" continues. + + * Tweak a few tests for "log --format=..." that show timestamps in + various formats. + + * The reflog expiry machinery has been taught to emit trace events. + + * Over-the-wire protocol learns a new request type to ask for object + sizes given a list of object names. + + +Fixes since v2.31 +----------------- + + * The fsmonitor interface read from its input without making sure + there is something to read from. This bug is new in 2.31 + timeframe. + + * The data structure used by fsmonitor interface was not properly + duplicated during an in-core merge, leading to use-after-free etc. + + * "git bisect" reimplemented more in C during 2.30 timeframe did not + take an annotated tag as a good/bad endpoint well. This regression + has been corrected. + + * Fix macros that can silently inject unintended null-statements. + + * CALLOC_ARRAY() macro replaces many uses of xcalloc(). + + * Update insn in Makefile comments to run fuzz-all target. + + * Fix a corner case bug in "git mv" on case insensitive systems, + which was introduced in 2.29 timeframe. + + * We had a code to diagnose and die cleanly when a required + clean/smudge filter is missing, but an assert before that + unnecessarily fired, hiding the end-user facing die() message. + (merge 6fab35f748 mt/cleanly-die-upon-missing-required-filter later to maint). + + * Update C code that sets a few configuration variables when a remote + is configured so that it spells configuration variable names in the + canonical camelCase. + (merge 0f1da600e6 ab/remote-write-config-in-camel-case later to maint). + + * A new configuration variable has been introduced to allow choosing + which version of the generation number gets used in the + commit-graph file. + (merge 702110aac6 ds/commit-graph-generation-config later to maint). + + * Perf test update to work better in secondary worktrees. + (merge 36e834abc1 jk/perf-in-worktrees later to maint). + + * Updates to memory allocation code around the use of pcre2 library. + (merge c1760352e0 ab/grep-pcre2-allocfix later to maint). + + * "git -c core.bare=false clone --bare ..." would have segfaulted, + which has been corrected. + (merge 75555676ad bc/clone-bare-with-conflicting-config later to maint). + + * When "git checkout" removes a path that does not exist in the + commit it is checking out, it wasn't careful enough not to follow + symbolic links, which has been corrected. + (merge fab78a0c3d mt/checkout-remove-nofollow later to maint). + + * A few option description strings started with capital letters, + which were corrected. + (merge 5ee90326dc cc/downcase-opt-help later to maint). + + * Plug or annotate remaining leaks that trigger while running the + very basic set of tests. + (merge 68ffe095a2 ah/plugleaks later to maint). + + * The hashwrite() API uses a buffering mechanism to avoid calling + write(2) too frequently. This logic has been refactored to be + easier to understand. + (merge ddaf1f62e3 ds/clarify-hashwrite later to maint). + + * "git cherry-pick/revert" with or without "--[no-]edit" did not spawn + the editor as expected (e.g. "revert --no-edit" after a conflict + still asked to edit the message), which has been corrected. + (merge 39edfd5cbc en/sequencer-edit-upon-conflict-fix later to maint). + + * "git daemon" has been tightened against systems that take backslash + as directory separator. + (merge 9a7f1ce8b7 rs/daemon-sanitize-dir-sep later to maint). + + * A NULL-dereference bug has been corrected in an error codepath in + "git for-each-ref", "git branch --list" etc. + (merge c685450880 jk/ref-filter-segfault-fix later to maint). + + * Streamline the codepath to fix the UTF-8 encoding issues in the + argv[] and the prefix on macOS. + (merge c7d0e61016 tb/precompose-prefix-simplify later to maint). + + * The command-line completion script (in contrib/) had a couple of + references that would have given a warning under the "-u" (nounset) + option. + (merge c5c0548d79 vs/completion-with-set-u later to maint). + + * When "git pack-objects" makes a literal copy of a part of existing + packfile using the reachability bitmaps, its update to the progress + meter was broken. + (merge 8e118e8490 jk/pack-objects-bitmap-progress-fix later to maint). + + * The dependencies for config-list.h and command-list.h were broken + when the former was split out of the latter, which has been + corrected. + (merge 56550ea718 sg/bugreport-fixes later to maint). + + * "git push --quiet --set-upstream" was not quiet when setting the + upstream branch configuration, which has been corrected. + (merge f3cce896a8 ow/push-quiet-set-upstream later to maint). + + * The prefetch task in "git maintenance" assumed that "git fetch" + from any remote would fetch all its local branches, which would + fetch too much if the user is interested in only a subset of + branches there. + (merge 32f67888d8 ds/maintenance-prefetch-fix later to maint). + + * Clarify that pathnames recorded in Git trees are most often (but + not necessarily) encoded in UTF-8. + (merge 9364bf465d ab/pathname-encoding-doc later to maint). + + * "git --config-env var=val cmd" weren't accepted (only + --config-env=var=val was). + (merge c331551ccf ps/config-env-option-with-separate-value later to maint). + + * When the reachability bitmap is in effect, the "do not lose + recently created objects and those that are reachable from them" + safety to protect us from races were disabled by mistake, which has + been corrected. + (merge 2ba582ba4c jk/prune-with-bitmap-fix later to maint). + + * Cygwin pathname handling fix. + (merge bccc37fdc7 ad/cygwin-no-backslashes-in-paths later to maint). + + * "git rebase --[no-]reschedule-failed-exec" did not work well with + its configuration variable, which has been corrected. + (merge e5b32bffd1 ab/rebase-no-reschedule-failed-exec later to maint). + + * Portability fix for command line completion script (in contrib/). + (merge f2acf763e2 si/zsh-complete-comment-fix later to maint). + + * "git repack -A -d" in a partial clone unnecessarily loosened + objects in promisor pack. + + * "git bisect skip" when custom words are used for new/old did not + work, which has been corrected. + + * A few variants of informational message "Already up-to-date" has + been rephrased. + (merge ad9322da03 js/merge-already-up-to-date-message-reword later to maint). + + * "git submodule update --quiet" did not propagate the quiet option + down to underlying "git fetch", which has been corrected. + (merge 62af4bdd42 nc/submodule-update-quiet later to maint). + + * Document that our test can use "local" keyword. + (merge a84fd3bcc6 jc/test-allows-local later to maint). + + * The word-diff mode has been taught to work better with a word + regexp that can match an empty string. + (merge 0324e8fc6b pw/word-diff-zero-width-matches later to maint). + + * "git p4" learned to find branch points more efficiently. + (merge 6b79818bfb jk/p4-locate-branch-point-optim later to maint). + + * When "git update-ref -d" removes a ref that is packed, it left + empty directories under $GIT_DIR/refs/ for + (merge 5f03e5126d wc/packed-ref-removal-cleanup later to maint). + + * "git clean" and "git ls-files -i" had confusion around working on + or showing ignored paths inside an ignored directory, which has + been corrected. + (merge b548f0f156 en/dir-traversal later to maint). + + * The handling of "%(push)" formatting element of "for-each-ref" and + friends was broken when the same codepath started handling + "%(push:<what>)", which has been corrected. + (merge 1e1c4c5eac zh/ref-filter-push-remote-fix later to maint). + + * The bash prompt script (in contrib/) did not work under "set -u". + (merge 5c0cbdb107 en/prompt-under-set-u later to maint). + + * The "chainlint" feature in the test framework is a handy way to + catch common mistakes in writing new tests, but tends to get + expensive. An knob to selectively disable it has been introduced + to help running tests that the developer has not modified. + (merge 2d86a96220 jk/test-chainlint-softer later to maint). + + * The "rev-parse" command did not diagnose the lack of argument to + "--path-format" option, which was introduced in v2.31 era, which + has been corrected. + (merge 99fc555188 wm/rev-parse-path-format-wo-arg later to maint). + + * Other code cleanup, docfix, build fix, etc. + (merge f451960708 dl/cat-file-doc-cleanup later to maint). + (merge 12604a8d0c sv/t9801-test-path-is-file-cleanup later to maint). + (merge ea7e63921c jr/doc-ignore-typofix later to maint). + (merge 23c781f173 ps/update-ref-trans-hook-doc later to maint). + (merge 42efa1231a jk/filter-branch-sha256 later to maint). + (merge 4c8e3dca6e tb/push-simple-uses-branch-merge-config later to maint). + (merge 6534d436a2 bs/asciidoctor-installation-hints later to maint). + (merge 47957485b3 ab/read-tree later to maint). + (merge 2be927f3d1 ab/diff-no-index-tests later to maint). + (merge 76593c09bb ab/detox-gettext-tests later to maint). + (merge 28e29ee38b jc/doc-format-patch-clarify later to maint). + (merge fc12b6fdde fm/user-manual-use-preface later to maint). + (merge dba94e3a85 cc/test-helper-bloom-usage-fix later to maint). + (merge 61a7660516 hn/reftable-tables-doc-update later to maint). + (merge 81ed96a9b2 jt/fetch-pack-request-fix later to maint). + (merge 151b6c2dd7 jc/doc-do-not-capitalize-clarification later to maint). + (merge 9160068ac6 js/access-nul-emulation-on-windows later to maint). + (merge 7a14acdbe6 po/diff-patch-doc later to maint). + (merge f91371b948 pw/patience-diff-clean-up later to maint). + (merge 3a7f0908b6 mt/clean-clean later to maint). + (merge d4e2d15a8b ab/streaming-simplify later to maint). + (merge 0e59f7ad67 ah/merge-ort-i18n later to maint). + (merge e6f68f62e0 ls/typofix later to maint). diff --git a/Documentation/RelNotes/2.33.0.txt b/Documentation/RelNotes/2.33.0.txt new file mode 100644 index 0000000000..893c18bfdd --- /dev/null +++ b/Documentation/RelNotes/2.33.0.txt @@ -0,0 +1,279 @@ +Git 2.33 Release Notes +====================== + +Updates since Git 2.32 +---------------------- + +UI, Workflows & Features + + * "git send-email" learned the "--sendmail-cmd" command line option + and the "sendemail.sendmailCmd" configuration variable, which is a + more sensible approach than the current way of repurposing the + "smtp-server" that is meant to name the server to instead name the + command to talk to the server. + + * The userdiff pattern for C# learned the token "record". + + * "git rev-list" learns to omit the "commit <object-name>" header + lines from the output with the `--no-commit-header` option. + + * "git worktree add --lock" learned to record why the worktree is + locked with a custom message. + + +Performance, Internal Implementation, Development Support etc. + + * The code to handle the "--format" option in "for-each-ref" and + friends made too many string comparisons on %(atom)s used in the + format string, which has been corrected by converting them into + enum when the format string is parsed. + + * Use the hashfile API in the codepath that writes the index file to + reduce code duplication. + + * Repeated rename detections in a sequence of mergy operations have + been optimized out for the 'ort' merge strategy. + + * Preliminary clean-up of tests before the main reftable changes + hits the codebase. + + * The backend for "diff -G/-S" has been updated to use pcre2 engine + when available. + + * Use ".DELETE_ON_ERROR" pseudo target to simplify our Makefile. + + * Code cleanup around struct_type_init() functions. + + * "git send-email" optimization. + + * GitHub Actions / CI update. + (merge 0dc787a9f2 js/ci-windows-update later to maint). + + * Object accesses in repositories with many alternate object store + have been optimized. + + * "git log" has been optimized not to waste cycles to load ref + decoration data that may not be needed. + + * Many "printf"-like helper functions we have have been annotated + with __attribute__() to catch placeholder/parameter mismatches. + + * Tests that cover protocol bits have been updated and helpers + used there have been consolidated. + + * The CI gained a new job to run "make sparse" check. + + * "git status" codepath learned to work with sparsely populated index + without hydrating it fully. + + * A guideline for gender neutral documentation has been added. + + * Documentation on "git diff -l<n>" and diff.renameLimit have been + updated, and the defaults for these limits have been raised. + + * The completion support used to offer alternate spelling of options + that exist only for compatibility, which has been corrected. + + * "TEST_OUTPUT_DIRECTORY=there make test" failed to work, which has + been corrected. + + * "git bundle" gained more test coverage. + + * "git read-tree" had a codepath where blobs are fetched one-by-one + from the promisor remote, which has been corrected to fetch in bulk. + + * Rewrite of "git submodule" in C continues. + + * "git checkout" and "git commit" learn to work without unnecessarily + expanding sparse indexes. + + +Fixes since v2.32 +----------------- + + * We historically rejected a very short string as an author name + while accepting a patch e-mail, which has been loosened. + (merge 72ee47ceeb ef/mailinfo-short-name later to maint). + + * The parallel checkout codepath did not initialize object ID field + used to talk to the worker processes in a futureproof way. + + * Rewrite code that triggers undefined behaviour warning. + (merge aafa5df0df jn/size-t-casted-to-off-t-fix later to maint). + + * The description of "fast-forward" in the glossary has been updated. + (merge e22f2daed0 ry/clarify-fast-forward-in-glossary later to maint). + + * Recent "git clone" left a temporary directory behind when the + transport layer returned an failure. + (merge 6aacb7d861 jk/clone-clean-upon-transport-error later to maint). + + * "git fetch" over protocol v2 left its side of the socket open after + it finished speaking, which unnecessarily wasted the resource on + the other side. + (merge ae1a7eefff jk/fetch-pack-v2-half-close-early later to maint). + + * The command line completion (in contrib/) learned that "git diff" + takes the "--anchored" option. + (merge d1e7c2cac9 tb/complete-diff-anchored later to maint). + + * "git-svn" tests assumed that "locale -a", which is used to pick an + available UTF-8 locale, is available everywhere. A knob has been + introduced to allow testers to specify a suitable locale to use. + (merge 482c962de4 dd/svn-test-wo-locale-a later to maint). + + * Update "git subtree" to work better on Windows. + (merge 77f37de39f js/subtree-on-windows-fix later to maint). + + * Remove multimail from contrib/ + (merge f74d11471f js/no-more-multimail later to maint). + + * Make the codebase MSAN clean. + (merge 4dbc55e87d ah/uninitialized-reads-fix later to maint). + + * Work around inefficient glob substitution in older versions of bash + by rewriting parts of a test. + (merge eb87c6f559 jx/t6020-with-older-bash later to maint). + + * Avoid duplicated work while building reachability bitmaps. + (merge aa9ad6fee5 jk/bitmap-tree-optim later to maint). + + * We broke "GIT_SKIP_TESTS=t?000" to skip certain tests in recent + update, which got fixed. + + * The side-band demultiplexer that is used to display progress output + from the remote end did not clear the line properly when the end of + line hits at a packet boundary, which has been corrected. + + * Some test scripts assumed that readlink(1) was universally + installed and available, which is not the case. + (merge 7c0afdf23c jk/test-without-readlink-1 later to maint). + + * Recent update to completion script (in contrib/) broke those who + use the __git_complete helper to define completion to their custom + command. + (merge cea232194d fw/complete-cmd-idx-fix later to maint). + + * Output from some of our tests were affected by the width of the + terminal that they were run in, which has been corrected by + exporting a fixed value in the COLUMNS environment. + (merge c49a177bec ab/fix-columns-to-80-during-tests later to maint). + + * On Windows, mergetool has been taught to find kdiff3.exe just like + it finds winmerge.exe. + (merge 47eb4c6890 ms/mergetools-kdiff3-on-windows later to maint). + + * When we cannot figure out how wide the terminal is, we use a + fallback value of 80 ourselves (which cannot be avoided), but when + we run the pager, we export it in COLUMNS, which forces the pager + to use the hardcoded value, even when the pager is perfectly + capable to figure it out itself. Stop exporting COLUMNS when we + fall back on the hardcoded default value for our own use. + (merge 9b6e2c8b98 js/stop-exporting-bogus-columns later to maint). + + * "git cat-file --batch-all-objects"" misbehaved when "--batch" is in + use and did not ask for certain object traits. + (merge ee02ac6164 zh/cat-file-batch-fix later to maint). + + * Some code and doc clarification around "git push". + + * The "union" conflict resultion variant misbehaved when used with + binary merge driver. + (merge 382b601acd jk/union-merge-binary later to maint). + + * Prevent "git p4" from failing to submit changes to binary file. + (merge 54662d5958 dc/p4-binary-submit-fix later to maint). + + * "git grep --and -e foo" ought to have been diagnosed as an error + but instead segfaulted, which has been corrected. + (merge fe7fe62d8d rs/grep-parser-fix later to maint). + + * The merge code had funny interactions between content based rename + detection and directory rename detection. + (merge 3585d0ea23 en/merge-dir-rename-corner-case-fix later to maint). + + * When rebuilding the multi-pack index file reusing an existing one, + we used to blindly trust the existing file and ended up carrying + corrupted data into the updated file, which has been corrected. + (merge f89ecf7988 tb/midx-use-checksum later to maint). + + * Update the location of system-side configuration file on Windows. + (merge e355307692 js/gfw-system-config-loc-fix later to maint). + + * Code recently added to support common ancestry negotiation during + "git push" did not sanity check its arguments carefully enough. + (merge eff40457a4 ab/fetch-negotiate-segv-fix later to maint). + + * Update the documentation not to assume users are of certain gender + and adds to guidelines to do so. + (merge 46a237f42f ds/gender-neutral-doc later to maint). + + * "git commit --allow-empty-message" won't abort the operation upon + an empty message, but the hint shown in the editor said otherwise. + (merge 6f70f00b4f hj/commit-allow-empty-message later to maint). + + * The code that gives an error message in "git multi-pack-index" when + no subcommand is given tried to print a NULL pointer as a strong, + which has been corrected. + (merge 88617d11f9 tb/reverse-midx later to maint). + + * CI update. + (merge a066a90db6 js/ci-check-whitespace-updates later to maint). + + * Documentation fix for "git pull --rebase=no". + (merge d3236becec fc/pull-no-rebase-merges-theirs-into-ours later to maint). + + * A race between repacking and using pack bitmaps has been corrected. + (merge dc1daacdcc jk/check-pack-valid-before-opening-bitmap later to maint). + + * The local changes stashed by "git merge --autostash" were lost when + the merge failed in certain ways, which has been corrected. + + * Windows rmdir() equivalent behaves differently from POSIX ones in + that when used on a symbolic link that points at a directory, the + target directory gets removed, which has been corrected. + (merge 3e7d4888e5 tb/mingw-rmdir-symlink-to-directory later to maint). + + * Other code cleanup, docfix, build fix, etc. + (merge bfe35a6165 ah/doc-describe later to maint). + (merge f302c1e4aa jc/clarify-revision-range later to maint). + (merge 3127ff90ea tl/fix-packfile-uri-doc later to maint). + (merge a84216c684 jk/doc-color-pager later to maint). + (merge 4e0a64a713 ab/trace2-squelch-gcc-warning later to maint). + (merge 225f7fa847 ps/rev-list-object-type-filter later to maint). + (merge 5317dfeaed dd/honor-users-tar-in-tests later to maint). + (merge ace6d8e3d6 tk/partial-clone-repack-doc later to maint). + (merge 7ba68e0cf1 js/trace2-discard-event-docfix later to maint). + (merge 8603c419d3 fc/doc-default-to-upstream-config later to maint). + (merge 1d72b604ef jk/revision-squelch-gcc-warning later to maint). + (merge abcb66c614 ar/typofix later to maint). + (merge 9853830787 ah/graph-typofix later to maint). + (merge aac578492d ab/config-hooks-path-testfix later to maint). + (merge 98c7656a18 ar/more-typofix later to maint). + (merge 6fb9195f6c jk/doc-max-pack-size later to maint). + (merge 4184cbd635 ar/mailinfo-memcmp-to-skip-prefix later to maint). + (merge 91d2347033 ar/doc-libera-chat-in-my-first-contrib later to maint). + (merge 338abb0f04 ab/cmd-foo-should-return later to maint). + (merge 546096a5cb ab/xdiff-bug-cleanup later to maint). + (merge b7b793d1e7 ab/progress-cleanup later to maint). + (merge d94f9b8e90 ba/object-info later to maint). + (merge 52ff891c03 ar/test-code-cleanup later to maint). + (merge a0538e5c8b dd/document-log-decorate-default later to maint). + (merge ce24797d38 mr/cmake later to maint). + (merge 9eb542f2ee ab/pre-auto-gc-hook-test later to maint). + (merge 9fffc38583 bk/doc-commit-typofix later to maint). + (merge 1cf823d8f0 ks/submodule-cleanup later to maint). + (merge ebbf5d2b70 js/config-mak-windows-pcre-fix later to maint). + (merge 617480d75b hn/refs-iterator-peel-returns-boolean later to maint). + (merge 6a24cc71ed ar/submodule-helper-include-cleanup later to maint). + (merge 5632e838f8 rs/khash-alloc-cleanup later to maint). + (merge b1d87fbaf1 jk/typofix later to maint). + (merge e04170697a ab/gitignore-discovery-doc later to maint). + (merge 8232a0ff48 dl/packet-read-response-end-fix later to maint). + (merge eb448631fb dl/diff-merge-base later to maint). + (merge c510928a25 hn/refs-debug-empty-prefix later to maint). + (merge ddcb189d9d tb/bitmap-type-filter-comment-fix later to maint). + (merge 878b399734 pb/submodule-recurse-doc later to maint). + (merge 734283855f jk/config-env-doc later to maint). + (merge 482e1488a9 ab/getcwd-test later to maint). + (merge f0b922473e ar/doc-markup-fix later to maint). diff --git a/Documentation/RelNotes/2.34.0.txt b/Documentation/RelNotes/2.34.0.txt new file mode 100644 index 0000000000..7b03e625d0 --- /dev/null +++ b/Documentation/RelNotes/2.34.0.txt @@ -0,0 +1,344 @@ +Git 2.34 Release Notes +====================== + +Updates since Git 2.33 +---------------------- + +UI, Workflows & Features + + * Pathname expansion (like "~username/") learned a way to specify a + location relative to Git installation (e.g. its $sharedir which is + $(prefix)/share), with "%(prefix)". + + * Use `ort` instead of `recursive` as the default merge strategy. + + * The userdiff pattern for "java" language has been updated. + + * "git rebase" by default skips changes that are equivalent to + commits that are already in the history the branch is rebased onto; + give messages when this happens to let the users be aware of + skipped commits, and also teach them how to tell "rebase" to keep + duplicated changes. + + * The advice message that "git cherry-pick" gives when it asks + conflicted replay of a commit to be resolved by the end user has + been updated. + + * After "git clone --recurse-submodules", all submodules are cloned + but they are not by default recursed into by other commands. With + submodule.stickyRecursiveClone configuration set, submodule.recurse + configuration is set to true in a repository created by "clone" + with "--recurse-submodules" option. + + * The logic for auto-correction of misspelt subcommands learned to go + interactive when the help.autocorrect configuration variable is set + to 'prompt'. + + * "git maintenance" scheduler learned to use systemd timers as a + possible backend. + + * "git diff --submodule=diff" showed failure from run_command() when + trying to run diff inside a submodule, when the user manually + removes the submodule directory. + + * "git bundle unbundle" learned to show progress display. + + * In cone mode, the sparse-index code path learned to remove ignored + files (like build artifacts) outside the sparse cone, allowing the + entire directory outside the sparse cone to be removed, which is + especially useful when the sparse patterns change. + + * Taking advantage of the CGI interface, http-backend has been + updated to enable protocol v2 automatically when the other side + asks for it. + + * The credential-cache helper has been adjusted to Windows. + + * The error in "git help no-such-git-command" is handled better. + + * The unicode character width table (used for output alignment) has + been updated. + + +Performance, Internal Implementation, Development Support etc. + + * "git bisect" spawned "git show-branch" only to pretty-print the + title of the commit after checking out the next version to be + tested; this has been rewritten in C. + + * "git add" can work better with the sparse index. + + * Support for ancient versions of cURL library (pre 7.19.4) has been + dropped. + + * A handful of tests that assumed implementation details of files + backend for refs have been cleaned up. + + * trace2 logs learned to show parent process name to see in what + context Git was invoked. + + * Loading of ref tips to prepare for common ancestry negotiation in + "git fetch-pack" has been optimized by taking advantage of the + commit graph when available. + + * Remind developers that the userdiff patterns should be kept simple + and permissive, assuming that the contents they apply are always + syntactically correct. + + * The current implementation of GIT_TEST_FAIL_PREREQS is broken in + that checking for the lack of a prerequisite would not work. Avoid + the use of "if ! test_have_prereq X" in a test script. + + * The revision traversal API has been optimized by taking advantage + of the commit-graph, when available, to determine if a commit is + reachable from any of the existing refs. + + * "git fetch --quiet" optimization to avoid useless computation of + info that will never be displayed. + + * Callers from older advice_config[] based API has been updated to + use the newer advice_if_enabled() and advice_enabled() API. + + * Teach "test_pause" and "debug" helpers to allow using the HOME and + TERM environment variables the user usually uses. + + * "make INSTALL_STRIP=-s install" allows the installation step to use + "install -s" to strip the binaries as they get installed. + + * Code that handles large number of refs in the "git fetch" code + path has been optimized. + + * The reachability bitmap file used to be generated only for a single + pack, but now we've learned to generate bitmaps for history that + span across multiple packfiles. + + * The code to make "git grep" recurse into submodules has been + updated to migrate away from the "add submodule's object store as + an alternate object store" mechanism (which is suboptimal). + + * The tracing of process ancestry information has been enhanced. + + * Reduce number of write(2) system calls while sending the + ref advertisement. + + * Update the build procedure to use the "-pedantic" build when + DEVELOPER makefile macro is in effect. + + * Large part of "git submodule add" gets rewritten in C. + + * The run-command API has been updated so that the callers can easily + ask the file descriptors open for packfiles to be closed immediately + before spawning commands that may trigger auto-gc. + + * An oddball OPTION_ARGUMENT feature has been removed from the + parse-options API. + + +Fixes since v2.33 +----------------- + + * Input validation of "git pack-objects --stdin-packs" has been + corrected. + (merge 561fa03529 ab/pack-stdin-packs-fix later to maint). + + * Bugfix for common ancestor negotiation recently introduced in "git + push" code path. + (merge 82823118b9 jt/push-negotiation-fixes later to maint). + + * "git pull" had various corner cases that were not well thought out + around its --rebase backend, e.g. "git pull --ff-only" did not stop + but went ahead and rebased when the history on other side is not a + descendant of our history. The series tries to fix them up. + (merge 6f843a3355 en/pull-conflicting-options later to maint). + + * "git apply" miscounted the bytes and failed to read to the end of + binary hunks. + (merge 46d723ce57 jk/apply-binary-hunk-parsing-fix later to maint). + + * "git range-diff" code clean-up. + (merge c4d5907324 jk/range-diff-fixes later to maint). + + * "git commit --fixup" now works with "--edit" again, after it was + broken in v2.32. + (merge 8ef6aad664 jk/commit-edit-fixup-fix later to maint). + + * Use upload-artifacts v1 (instead of v2) for 32-bit linux, as the + new version has a blocker bug for that architecture. + (merge 3cf9bb36bf cb/ci-use-upload-artifacts-v1 later to maint). + + * Checking out all the paths from HEAD during the last conflicted + step in "git rebase" and continuing would cause the step to be + skipped (which is expected), but leaves MERGE_MSG file behind in + $GIT_DIR and confuses the next "git commit", which has been + corrected. + (merge e5ee33e855 pw/rebase-skip-final-fix later to maint). + + * Various bugs in "git rebase -r" have been fixed. + (merge f2563c9ef3 pw/rebase-r-fixes later to maint). + + * mmap() imitation used to call xmalloc() that dies upon malloc() + failure, which has been corrected to just return an error to the + caller to be handled. + (merge 95b4ff3931 rs/git-mmap-uses-malloc later to maint). + + * "git diff --relative" segfaulted and/or produced incorrect result + when there are unmerged paths. + (merge 8174627b3d dd/diff-files-unmerged-fix later to maint). + + * The delayed checkout code path in "git checkout" etc. were chatty + even when --quiet and/or --no-progress options were given. + (merge 7a132c628e mt/quiet-with-delayed-checkout later to maint). + + * "git branch -D <branch>" used to refuse to remove a broken branch + ref that points at a missing commit, which has been corrected. + (merge 597a977489 rs/branch-allow-deleting-dangling later to maint). + + * Build update for Apple clang. + (merge f32c5d3716 cb/makefile-apple-clang later to maint). + + * The parser for the "--nl" option of "git column" has been + corrected. + (merge c93ca46cf5 sg/column-nl later to maint). + + * "git upload-pack" which runs on the other side of "git fetch" + forgot to take the ref namespaces into account when handling + want-ref requests. + (merge 53a66ec37c ka/want-ref-in-namespace later to maint). + + * The sparse-index support can corrupt the index structure by storing + a stale and/or uninitialized data, which has been corrected. + (merge d9e9b44d7a jh/sparse-index-resize-fix later to maint). + + * Buggy tests could damage repositories outside the throw-away test + area we created. We now by default export GIT_CEILING_DIRECTORIES + to limit the damage from such a stray test. + (merge 614c3d8f2e sg/set-ceiling-during-tests later to maint). + + * Even when running "git send-email" without its own threaded + discussion support, a threading related header in one message is + carried over to the subsequent message to result in an unwanted + threading, which has been corrected. + (merge e082113484 mh/send-email-reset-in-reply-to later to maint). + + * The output from "git fast-export", when its anonymization feature + is in use, showed an annotated tag incorrectly. + (merge 2f040a9671 tk/fast-export-anonymized-tag-fix later to maint). + + * Doc update plus improved error reporting. + (merge 1e93770888 jk/log-warn-on-bogus-encoding later to maint). + + * Recent "diff -m" changes broke "gitk", which has been corrected. + (merge 5acffd3473 so/diff-index-regression-fix later to maint). + + * Regression fix. + (merge b996f84989 ab/send-email-config-fix later to maint). + + * The "git apply -3" code path learned not to bother the lower level + merge machinery when the three-way merge can be trivially resolved + without the content level merge. This fixes a regression caused by + recent "-3way first and fall back to direct application" change. + (merge 57f183b698 jc/trivial-threeway-binary-merge later to maint). + + * The code that optionally creates the *.rev reverse index file has + been optimized to avoid needless computation when it is not writing + the file out. + (merge 8fe8bae9d2 ab/reverse-midx-optim later to maint). + + * "git range-diff -I... <range> <range>" segfaulted, which has been + corrected. + (merge 709b3f32d3 rs/range-diff-avoid-segfault-with-I later to maint). + + * The order in which various files that make up a single (conceptual) + packfile has been reevaluated and straightened up. This matters in + correctness, as an incomplete set of files must not be shown to a + running Git. + (merge 4bc1fd6e39 tb/pack-finalize-ordering later to maint). + + * The "mode" word is useless in a call to open(2) that does not + create a new file. Such a call in the files backend of the ref + subsystem has been cleaned up. + (merge 35cf94eaf6 rs/no-mode-to-open-when-appending later to maint). + + * "git update-ref --stdin" failed to flush its output as needed, + which potentially led the conversation to a deadlock. + (merge 7c1200745b ps/update-ref-batch-flush later to maint). + + * When "git am --abort" fails to abort correctly, it still exited + with exit status of 0, which has been corrected. + (merge c5ead19ea2 en/am-abort-fix later to maint). + + * Correct nr and alloc members of strvec struct to be of type size_t. + (merge 8d133a4653 jk/strvec-typefix later to maint). + + * "git stash", where the tentative change involves changing a + directory to a file (or vice versa), was confused, which has been + corrected. + (merge bee8691f19 en/stash-df-fix later to maint). + + * "git clone" from a repository whose HEAD is unborn into a bare + repository didn't follow the branch name the other side used, which + is corrected. + (merge 6b58df54cf jk/clone-unborn-head-in-bare later to maint). + + * "git cvsserver" had a long-standing bug in its authentication code, + which has finally been corrected (it is unclear and is a separate + question if anybody is seriously using it, though). + (merge 4b81f690f6 cb/cvsserver later to maint). + + * "git difftool --dir-diff" mishandled symbolic links. + (merge 5bafb3576a da/difftool-dir-diff-symlink-fix later to maint). + + * Sensitive data in the HTTP trace were supposed to be redacted, but + we failed to do so in HTTP/2 requests. + (merge b66c77a64e jk/http-redact-fix later to maint). + + * "make clean" has been updated to remove leftover .depend/ + directories, even when it is not told to use them to compute header + dependencies. + (merge f0a74bcb03 ab/make-clean-depend-dirs later to maint). + + * Protocol v0 clients can get stuck parsing a malformed feature line. + (merge 44d2aec6e8 ah/connect-parse-feature-v0-fix later to maint). + + * Other code cleanup, docfix, build fix, etc. + (merge 1d9c8daef8 ab/bundle-doc later to maint). + (merge 81483fe613 en/merge-strategy-docs later to maint). + (merge 626beebdf8 js/log-protocol-version later to maint). + (merge 00e302da76 cb/builtin-merge-format-string-fix later to maint). + (merge ad51ae4dc0 cb/ci-freebsd-update later to maint). + (merge be6444d1ca fc/completion-updates later to maint). + (merge ff7b83f562 ti/tcsh-completion-regression-fix later to maint). + (merge 325b06deda sg/make-fix-ar-invocation later to maint). + (merge bd72824c60 me/t5582-cleanup later to maint). + (merge f6a5af0f62 ga/send-email-sendmail-cmd later to maint). + (merge f58c7468cd ab/ls-remote-packet-trace later to maint). + (merge 0160f7e725 ab/rebase-fatal-fatal-fix later to maint). + (merge a16eb6b1ff js/maintenance-launchctl-fix later to maint). + (merge c21b2511c2 jk/t5323-no-pack-test-fix later to maint). + (merge 5146c2f148 mh/credential-leakfix later to maint). + (merge 1549577338 dd/t6300-wo-gpg-fix later to maint). + (merge 66e905b7dd rs/xopen-reports-open-failures later to maint). + (merge 469888e6a5 es/walken-tutorial-fix later to maint). + (merge 88682b016d ba/object-info later to maint). + (merge b45c172e51 ab/gc-log-rephrase later to maint). + (merge ccdd5d1eb1 ab/mailmap-leakfix later to maint). + (merge 6540b71614 cb/remote-ndebug-fix later to maint). + (merge e4f8d27585 rs/show-branch-simplify later to maint). + (merge e124ecf7f7 rs/archive-use-object-id later to maint). + (merge cebead1ebf cb/ci-build-pedantic later to maint). + (merge ca0cc98e03 bs/doc-bugreport-outdir later to maint). + (merge 72b113e562 ab/no-more-check-bindir later to maint). + (merge 92a5d1c9b4 jc/prefix-filename-allocates later to maint). + (merge d9a65b6c0a rs/setup-use-xopen-and-xdup later to maint). + (merge e8f55568de jk/t5562-racefix later to maint). + (merge 8f0f110156 rs/drop-core-compression-vars later to maint). + (merge b6d8887d3d ma/doc-git-version later to maint). + (merge 66c0c44df6 cb/plug-leaks-in-alloca-emu-users later to maint). + (merge afb32e8101 kz/revindex-comment-fix later to maint). + (merge ae578de926 po/git-config-doc-mentions-help-c later to maint). + (merge 187fc8b8b6 cb/unicode-14 later to maint). + (merge 3584cff71c en/typofixes later to maint). + (merge f188160be9 ab/bundle-remove-verbose-option later to maint). + (merge 8c6b4332b4 rs/close-pack-leakfix later to maint). + (merge 51b04c05b7 bs/difftool-msg-tweak later to maint). + (merge dd20e4a6db ab/make-compdb-fix later to maint). diff --git a/Documentation/RelNotes/2.8.0.txt b/Documentation/RelNotes/2.8.0.txt index 27320b6a9f..38453281b8 100644 --- a/Documentation/RelNotes/2.8.0.txt +++ b/Documentation/RelNotes/2.8.0.txt @@ -377,7 +377,7 @@ notes for details). on that order. * "git show 'HEAD:Foo[BAR]Baz'" did not interpret the argument as a - rev, i.e. the object named by the the pathname with wildcard + rev, i.e. the object named by the pathname with wildcard characters in a tree object. (merge aac4fac nd/dwim-wildcards-as-pathspecs later to maint). diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 0452db2e67..e409022d93 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -74,10 +74,9 @@ the feature triggers the new behavior when it should, and to show the feature does not trigger when it shouldn't. After any code change, make sure that the entire test suite passes. -If you have an account at GitHub (and you can get one for free to work -on open source projects), you can use their Travis CI integration to -test your changes on Linux, Mac (and hopefully soon Windows). See -GitHub-Travis CI hints section for details. +Pushing to a fork of https://github.com/git/git will use their CI +integration to test your changes on Linux, Mac and Windows. See the +<<GHCI,GitHub CI>> section for details. Do not forget to update the documentation to describe the updated behavior and make sure that the resulting documentation set formats @@ -117,10 +116,13 @@ If in doubt which identifier to use, run `git log --no-merges` on the files you are modifying to see the current conventions. [[summary-section]] -It's customary to start the remainder of the first line after "area: " -with a lower-case letter. E.g. "doc: clarify...", not "doc: -Clarify...", or "githooks.txt: improve...", not "githooks.txt: -Improve...". +The title sentence after the "area:" prefix omits the full stop at the +end, and its first word is not capitalized unless there is a reason to +capitalize it other than because it is the first word in the sentence. +E.g. "doc: clarify...", not "doc: Clarify...", or "githooks.txt: +improve...", not "githooks.txt: Improve...". But "refs: HEAD is also +treated as a ref" is correct, as we spell `HEAD` in all caps even when +it appears in the middle of a sentence. [[meaningful-message]] The body should provide a meaningful commit message, which: @@ -164,6 +166,85 @@ or, on an older version of Git without support for --pretty=reference: git show -s --date=short --pretty='format:%h (%s, %ad)' <commit> .... +[[sign-off]] +=== Certify your work by adding your `Signed-off-by` trailer + +To improve tracking of who did what, we ask you to certify that you +wrote the patch or have the right to pass it on under the same license +as ours, by "signing off" your patch. Without sign-off, we cannot +accept your patches. + +If (and only if) you certify the below D-C-O: + +[[dco]] +.Developer's Certificate of Origin 1.1 +____ +By making a contribution to this project, I certify that: + +a. The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +b. The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +c. The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +d. I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +____ + +you add a "Signed-off-by" trailer to your commit, that looks like +this: + +.... + Signed-off-by: Random J Developer <random@developer.example.org> +.... + +This line can be added by Git if you run the git-commit command with +the -s option. + +Notice that you can place your own `Signed-off-by` trailer when +forwarding somebody else's patch with the above rules for +D-C-O. Indeed you are encouraged to do so. Do not forget to +place an in-body "From: " line at the beginning to properly attribute +the change to its true author (see (2) above). + +This procedure originally came from the Linux kernel project, so our +rule is quite similar to theirs, but what exactly it means to sign-off +your patch differs from project to project, so it may be different +from that of the project you are accustomed to. + +[[real-name]] +Also notice that a real name is used in the `Signed-off-by` trailer. Please +don't hide your real name. + +[[commit-trailers]] +If you like, you can put extra tags at the end: + +. `Reported-by:` is used to credit someone who found the bug that + the patch attempts to fix. +. `Acked-by:` says that the person who is more familiar with the area + the patch attempts to modify liked the patch. +. `Reviewed-by:`, unlike the other tags, can only be offered by the + reviewers themselves when they are completely satisfied with the + patch after a detailed analysis. +. `Tested-by:` is used to indicate that the person applied the patch + and found it to have the desired effect. + +You can also create your own tag or use one that's in common usage +such as "Thanks-to:", "Based-on-patch-by:", or "Mentored-by:". + [[git-tools]] === Generate your patch using Git tools out of your commits. @@ -299,86 +380,6 @@ Do not forget to add trailers such as `Acked-by:`, `Reviewed-by:` and `Tested-by:` lines as necessary to credit people who helped your patch, and "cc:" them when sending such a final version for inclusion. -[[sign-off]] -=== Certify your work by adding your `Signed-off-by` trailer - -To improve tracking of who did what, we ask you to certify that you -wrote the patch or have the right to pass it on under the same license -as ours, by "signing off" your patch. Without sign-off, we cannot -accept your patches. - -If (and only if) you certify the below D-C-O: - -[[dco]] -.Developer's Certificate of Origin 1.1 -____ -By making a contribution to this project, I certify that: - -a. The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or - -b. The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or - -c. The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. - -d. I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. -____ - -you add a "Signed-off-by" trailer to your commit, that looks like -this: - -.... - Signed-off-by: Random J Developer <random@developer.example.org> -.... - -This line can be added by Git if you run the git-commit command with -the -s option. - -Notice that you can place your own `Signed-off-by` trailer when -forwarding somebody else's patch with the above rules for -D-C-O. Indeed you are encouraged to do so. Do not forget to -place an in-body "From: " line at the beginning to properly attribute -the change to its true author (see (2) above). - -This procedure originally came from the Linux kernel project, so our -rule is quite similar to theirs, but what exactly it means to sign-off -your patch differs from project to project, so it may be different -from that of the project you are accustomed to. - -[[real-name]] -Also notice that a real name is used in the `Signed-off-by` trailer. Please -don't hide your real name. - -[[commit-trailers]] -If you like, you can put extra tags at the end: - -. `Reported-by:` is used to credit someone who found the bug that - the patch attempts to fix. -. `Acked-by:` says that the person who is more familiar with the area - the patch attempts to modify liked the patch. -. `Reviewed-by:`, unlike the other tags, can only be offered by the - reviewer and means that she is completely satisfied that the patch - is ready for application. It is usually offered only after a - detailed review. -. `Tested-by:` is used to indicate that the person applied the patch - and found it to have the desired effect. - -You can also create your own tag or use one that's in common usage -such as "Thanks-to:", "Based-on-patch-by:", or "Mentored-by:". - == Subsystems with dedicated maintainers Some parts of the system have dedicated maintainers with their own @@ -447,13 +448,12 @@ their trees themselves. entitled "What's cooking in git.git" and "What's in git.git" giving the status of various proposed changes. -[[travis]] -== GitHub-Travis CI hints +== GitHub CI[[GHCI]]] -With an account at GitHub (you can get one for free to work on open -source projects), you can use Travis CI to test your changes on Linux, -Mac (and hopefully soon Windows). You can find a successful example -test build here: https://travis-ci.org/git/git/builds/120473209 +With an account at GitHub, you can use GitHub CI to test your changes +on Linux, Mac and Windows. See +https://github.com/git/git/actions/workflows/main.yml for examples of +recent CI runs. Follow these steps for the initial setup: @@ -461,31 +461,18 @@ Follow these steps for the initial setup: You can find detailed instructions how to fork here: https://help.github.com/articles/fork-a-repo/ -. Open the Travis CI website: https://travis-ci.org - -. Press the "Sign in with GitHub" button. - -. Grant Travis CI permissions to access your GitHub account. - You can find more information about the required permissions here: - https://docs.travis-ci.com/user/github-oauth-scopes - -. Open your Travis CI profile page: https://travis-ci.org/profile - -. Enable Travis CI builds for your Git fork. - -After the initial setup, Travis CI will run whenever you push new changes +After the initial setup, CI will run whenever you push new changes to your fork of Git on GitHub. You can monitor the test state of all your -branches here: https://travis-ci.org/__<Your GitHub handle>__/git/branches +branches here: https://github.com/<Your GitHub handle>/git/actions/workflows/main.yml If a branch did not pass all test cases then it is marked with a red -cross. In that case you can click on the failing Travis CI job and -scroll all the way down in the log. Find the line "<-- Click here to see -detailed test output!" and click on the triangle next to the log line -number to expand the detailed test output. Here is such a failing -example: https://travis-ci.org/git/git/jobs/122676187 - -Fix the problem and push your fix to your Git fork. This will trigger -a new Travis CI build to ensure all tests pass. +cross. In that case you can click on the failing job and navigate to +"ci/run-build-and-tests.sh" and/or "ci/print-test-failures.sh". You +can also download "Artifacts" which are tarred (or zipped) archives +with test data relevant for debugging. + +Then fix the problem and push your fix to your GitHub fork. This will +trigger a new CI build to ensure all tests pass. [[mua]] == MUA specific hints diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt index dc3bceb6d1..117f4cf806 100644 --- a/Documentation/blame-options.txt +++ b/Documentation/blame-options.txt @@ -1,6 +1,6 @@ -b:: Show blank SHA-1 for boundary commits. This can also - be controlled via the `blame.blankboundary` config option. + be controlled via the `blame.blankBoundary` config option. --root:: Do not treat root commits as boundaries. This can also be diff --git a/Documentation/config.txt b/Documentation/config.txt index 6ba50b1104..0c0e6b859f 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -46,7 +46,7 @@ Subsection names are case sensitive and can contain any characters except newline and the null byte. Doublequote `"` and backslash can be included by escaping them as `\"` and `\\`, respectively. Backslashes preceding other characters are dropped when reading; for example, `\t` is read as -`t` and `\0` is read as `0` Section headers cannot span multiple lines. +`t` and `\0` is read as `0`. Section headers cannot span multiple lines. Variables may belong directly to a section or to a given subsection. You can have `[section]` if you have `[section "subsection"]`, but you don't need to. @@ -298,6 +298,15 @@ pathname:: tilde expansion happens to such a string: `~/` is expanded to the value of `$HOME`, and `~user/` to the specified user's home directory. ++ +If a path starts with `%(prefix)/`, the remainder is interpreted as a +path relative to Git's "runtime prefix", i.e. relative to the location +where Git itself was installed. For example, `%(prefix)/bin/` refers to +the directory in which the Git executable itself lives. If Git was +compiled without runtime prefix support, the compiled-in prefix will be +subsituted instead. In the unlikely event that a literal path needs to +be specified that should _not_ be expanded, it needs to be prefixed by +`./`, like so: `./%(prefix)/bin`. Variables @@ -398,6 +407,8 @@ include::config/interactive.txt[] include::config/log.txt[] +include::config/lsrefs.txt[] + include::config/mailinfo.txt[] include::config/mailmap.txt[] diff --git a/Documentation/config/advice.txt b/Documentation/config/advice.txt index acbd0c09aa..063eec2511 100644 --- a/Documentation/config/advice.txt +++ b/Documentation/config/advice.txt @@ -44,6 +44,9 @@ advice.*:: Shown when linkgit:git-push[1] rejects a forced update of a branch when its remote-tracking ref has updates that we do not have locally. + skippedCherryPicks:: + Shown when linkgit:git-rebase[1] skips a commit that has already + been cherry-picked onto the upstream branch. statusAheadBehind:: Shown when linkgit:git-status[1] computes the ahead/behind counts for a local ref compared to its remote tracking ref, @@ -119,4 +122,8 @@ advice.*:: addEmptyPathspec:: Advice shown if a user runs the add command without providing the pathspec parameter. + updateSparsePath:: + Advice shown when either linkgit:git-add[1] or linkgit:git-rm[1] + is asked to update index entries outside the current sparse + checkout. -- diff --git a/Documentation/config/blame.txt b/Documentation/config/blame.txt index 9468e8599c..4d047c1790 100644 --- a/Documentation/config/blame.txt +++ b/Documentation/config/blame.txt @@ -27,7 +27,7 @@ blame.ignoreRevsFile:: file names will reset the list of ignored revisions. This option will be handled before the command line option `--ignore-revs-file`. -blame.markUnblamables:: +blame.markUnblamableLines:: Mark lines that were changed by an ignored revision that we could not attribute to another commit with a '*' in the output of linkgit:git-blame[1]. diff --git a/Documentation/config/checkout.txt b/Documentation/config/checkout.txt index 2cddf7b4b4..bfbca90f0e 100644 --- a/Documentation/config/checkout.txt +++ b/Documentation/config/checkout.txt @@ -21,3 +21,24 @@ checkout.guess:: Provides the default value for the `--guess` or `--no-guess` option in `git checkout` and `git switch`. See linkgit:git-switch[1] and linkgit:git-checkout[1]. + +checkout.workers:: + The number of parallel workers to use when updating the working tree. + The default is one, i.e. sequential execution. If set to a value less + than one, Git will use as many workers as the number of logical cores + available. This setting and `checkout.thresholdForParallelism` affect + all commands that perform checkout. E.g. checkout, clone, reset, + sparse-checkout, etc. ++ +Note: parallel checkout usually delivers better performance for repositories +located on SSDs or over NFS. For repositories on spinning disks and/or machines +with a small number of cores, the default sequential checkout often performs +better. The size and compression level of a repository might also influence how +well the parallel version performs. + +checkout.thresholdForParallelism:: + When running parallel checkout with a small number of files, the cost + of subprocess spawning and inter-process communication might outweigh + the parallelization gains. This setting allows to define the minimum + number of files for which parallel checkout should be attempted. The + default is 100. diff --git a/Documentation/config/clone.txt b/Documentation/config/clone.txt index 47de36a5fe..7bcfbd18a5 100644 --- a/Documentation/config/clone.txt +++ b/Documentation/config/clone.txt @@ -2,3 +2,7 @@ clone.defaultRemoteName:: The name of the remote to create when cloning a repository. Defaults to `origin`, and can be overridden by passing the `--origin` command-line option to linkgit:git-clone[1]. + +clone.rejectShallow:: + Reject to clone a repository if it is a shallow one, can be overridden by + passing option `--reject-shallow` in command line. See linkgit:git-clone[1] diff --git a/Documentation/config/color.txt b/Documentation/config/color.txt index d5daacb13a..e05d520a86 100644 --- a/Documentation/config/color.txt +++ b/Documentation/config/color.txt @@ -127,8 +127,9 @@ color.interactive.<slot>:: interactive commands. color.pager:: - A boolean to enable/disable colored output when the pager is in - use (default is true). + A boolean to specify whether `auto` color modes should colorize + output going to the pager. Defaults to true; set this to false + if your pager does not understand ANSI color codes. color.push:: A boolean to enable/disable color in push errors. May be set to diff --git a/Documentation/config/commitgraph.txt b/Documentation/config/commitgraph.txt index 4582c39fc4..30604e4a4c 100644 --- a/Documentation/config/commitgraph.txt +++ b/Documentation/config/commitgraph.txt @@ -1,3 +1,9 @@ +commitGraph.generationVersion:: + Specifies the type of generation number version to use when writing + or reading the commit-graph file. If version 1 is specified, then + the corrected commit dates will not be written or read. Defaults to + 2. + commitGraph.maxNewFilters:: Specifies the default value for the `--max-new-filters` option of `git commit-graph write` (c.f., linkgit:git-commit-graph[1]). diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 160aacad84..c04f62a54a 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -625,4 +625,6 @@ core.abbrev:: computed based on the approximate number of packed objects in your repository, which hopefully is enough for abbreviated object names to stay unique for some time. + If set to "no", no abbreviation is made and the object names + are shown in their full length. The minimum length is 4. diff --git a/Documentation/config/diff.txt b/Documentation/config/diff.txt index c3ae136eba..32f84838ac 100644 --- a/Documentation/config/diff.txt +++ b/Documentation/config/diff.txt @@ -85,6 +85,8 @@ diff.ignoreSubmodules:: and 'git status' when `status.submoduleSummary` is set unless it is overridden by using the --ignore-submodules command-line option. The 'git submodule' commands are not affected by this setting. + By default this is set to untracked so that any untracked + submodules are ignored. diff.mnemonicPrefix:: If set, 'git diff' uses a prefix pair that is different from the @@ -116,9 +118,10 @@ diff.orderFile:: relative to the top of the working tree. diff.renameLimit:: - The number of files to consider when performing the copy/rename - detection; equivalent to the 'git diff' option `-l`. This setting - has no effect if rename detection is turned off. + The number of files to consider in the exhaustive portion of + copy/rename detection; equivalent to the 'git diff' option + `-l`. If not set, the default value is currently 1000. This + setting has no effect if rename detection is turned off. diff.renames:: Whether and how Git detects renames. If set to "false", diff --git a/Documentation/config/fetch.txt b/Documentation/config/fetch.txt index 6af6f5edb2..63748c02b7 100644 --- a/Documentation/config/fetch.txt +++ b/Documentation/config/fetch.txt @@ -69,7 +69,8 @@ fetch.negotiationAlgorithm:: setting defaults to "skipping". Unknown values will cause 'git fetch' to error out. + -See also the `--negotiation-tip` option for linkgit:git-fetch[1]. +See also the `--negotiate-only` and `--negotiation-tip` options to +linkgit:git-fetch[1]. fetch.showForcedUpdates:: Set to false to enable `--no-show-forced-updates` in diff --git a/Documentation/config/gui.txt b/Documentation/config/gui.txt index d30831a130..0c087fd8c9 100644 --- a/Documentation/config/gui.txt +++ b/Documentation/config/gui.txt @@ -11,7 +11,7 @@ gui.displayUntracked:: in the file list. The default is "true". gui.encoding:: - Specifies the default encoding to use for displaying of + Specifies the default character encoding to use for displaying of file contents in linkgit:git-gui[1] and linkgit:gitk[1]. It can be overridden by setting the 'encoding' attribute for relevant files (see linkgit:gitattributes[5]). diff --git a/Documentation/config/help.txt b/Documentation/config/help.txt index 783a90a0f9..610701f9a3 100644 --- a/Documentation/config/help.txt +++ b/Documentation/config/help.txt @@ -9,13 +9,15 @@ help.format:: help.autoCorrect:: If git detects typos and can identify exactly one valid command similar - to the error, git will automatically run the intended command after - waiting a duration of time defined by this configuration value in - deciseconds (0.1 sec). If this value is 0, the suggested corrections - will be shown, but not executed. If it is a negative integer, or - "immediate", the suggested command - is run immediately. If "never", suggestions are not shown at all. The - default value is zero. + to the error, git will try to suggest the correct command or even + run the suggestion automatically. Possible config values are: + - 0 (default): show the suggested command. + - positive number: run the suggested command after specified +deciseconds (0.1 sec). + - "immediate": run the suggested command immediately. + - "prompt": show the suggestion and prompt for confirmation to run +the command. + - "never": don't run or show any suggested command. help.htmlPath:: Specify the path where the HTML documentation resides. File system paths diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt index 7cb50b37e9..75f3a2d105 100644 --- a/Documentation/config/index.txt +++ b/Documentation/config/index.txt @@ -14,6 +14,11 @@ index.recordOffsetTable:: Defaults to 'true' if index.threads has been explicitly enabled, 'false' otherwise. +index.sparse:: + When enabled, write the index using sparse-directory entries. This + has no effect unless `core.sparseCheckout` and + `core.sparseCheckoutCone` are both enabled. Defaults to 'false'. + index.threads:: Specifies the number of threads to spawn when loading the index. This is meant to reduce index load time on multiprocessor machines. diff --git a/Documentation/config/init.txt b/Documentation/config/init.txt index dc77f8c844..79c79d6617 100644 --- a/Documentation/config/init.txt +++ b/Documentation/config/init.txt @@ -4,4 +4,4 @@ init.templateDir:: init.defaultBranch:: Allows overriding the default branch name e.g. when initializing - a new repository or when cloning an empty repository. + a new repository. diff --git a/Documentation/config/log.txt b/Documentation/config/log.txt index 208d5fdcaa..456eb07800 100644 --- a/Documentation/config/log.txt +++ b/Documentation/config/log.txt @@ -24,6 +24,11 @@ log.excludeDecoration:: the config option can be overridden by the `--decorate-refs` option. +log.diffMerges:: + Set default diff format to be used for merge commits. See + `--diff-merges` in linkgit:git-log[1] for details. + Defaults to `separate`. + log.follow:: If `true`, `git log` will act as if the `--follow` option was used when a single <path> is given. This has the same limitations as `--follow`, diff --git a/Documentation/config/lsrefs.txt b/Documentation/config/lsrefs.txt new file mode 100644 index 0000000000..adeda0f24d --- /dev/null +++ b/Documentation/config/lsrefs.txt @@ -0,0 +1,9 @@ +lsrefs.unborn:: + May be "advertise" (the default), "allow", or "ignore". If "advertise", + the server will respond to the client sending "unborn" (as described in + protocol-v2.txt) and will advertise support for this feature during the + protocol v2 capability advertisement. "allow" is the same as + "advertise" except that the server will not advertise support for this + feature; this is useful for load-balanced servers that cannot be + updated atomically (for example), since the administrator could + configure "allow", then after a delay, configure "advertise". diff --git a/Documentation/config/maintenance.txt b/Documentation/config/maintenance.txt index a5ead09e4b..18f0562131 100644 --- a/Documentation/config/maintenance.txt +++ b/Documentation/config/maintenance.txt @@ -15,8 +15,9 @@ maintenance.strategy:: * `none`: This default setting implies no task are run at any schedule. * `incremental`: This setting optimizes for performing small maintenance activities that do not delete any data. This does not schedule the `gc` - task, but runs the `prefetch` and `commit-graph` tasks hourly and the - `loose-objects` and `incremental-repack` tasks daily. + task, but runs the `prefetch` and `commit-graph` tasks hourly, the + `loose-objects` and `incremental-repack` tasks daily, and the `pack-refs` + task weekly. maintenance.<task>.enabled:: This boolean config option controls whether the maintenance task diff --git a/Documentation/config/merge.txt b/Documentation/config/merge.txt index cb2ed58907..e27cc63944 100644 --- a/Documentation/config/merge.txt +++ b/Documentation/config/merge.txt @@ -14,7 +14,7 @@ merge.defaultToUpstream:: branches at the remote named by `branch.<current branch>.remote` are consulted, and then they are mapped via `remote.<remote>.fetch` to their corresponding remote-tracking branches, and the tips of - these tracking branches are merged. + these tracking branches are merged. Defaults to true. merge.ff:: By default, Git does not create an extra merge commit when merging @@ -33,10 +33,12 @@ merge.verifySignatures:: include::fmt-merge-msg.txt[] merge.renameLimit:: - The number of files to consider when performing rename detection - during a merge; if not specified, defaults to the value of - diff.renameLimit. This setting has no effect if rename detection - is turned off. + The number of files to consider in the exhaustive portion of + rename detection during a merge. If not specified, defaults + to the value of diff.renameLimit. If neither + merge.renameLimit nor diff.renameLimit are specified, + currently defaults to 7000. This setting has no effect if + rename detection is turned off. merge.renames:: Whether Git detects renames. If set to "false", rename detection diff --git a/Documentation/config/mergetool.txt b/Documentation/config/mergetool.txt index 16a27443a3..cafbbef46a 100644 --- a/Documentation/config/mergetool.txt +++ b/Documentation/config/mergetool.txt @@ -13,6 +13,11 @@ mergetool.<tool>.cmd:: merged; 'MERGED' contains the name of the file to which the merge tool should write the results of a successful merge. +mergetool.<tool>.hideResolved:: + Allows the user to override the global `mergetool.hideResolved` value + for a specific tool. See `mergetool.hideResolved` for the full + description. + mergetool.<tool>.trustExitCode:: For a custom merge command, specify whether the exit code of the merge command can be used to determine whether the merge was @@ -40,6 +45,16 @@ mergetool.meld.useAutoMerge:: value of `false` avoids using `--auto-merge` altogether, and is the default value. +mergetool.hideResolved:: + During a merge Git will automatically resolve as many conflicts as + possible and write the 'MERGED' file containing conflict markers around + any conflicts that it cannot resolve; 'LOCAL' and 'REMOTE' normally + represent the versions of the file from before Git's conflict + resolution. This flag causes 'LOCAL' and 'REMOTE' to be overwriten so + that only the unresolved conflicts are presented to the merge tool. Can + be configured per-tool via the `mergetool.<tool>.hideResolved` + configuration variable. Defaults to `false`. + mergetool.keepBackup:: After performing a merge, the original file with conflict markers can be saved as a file with a `.orig` extension. If this variable diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index 837f1b1679..763f7af7c4 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -99,12 +99,23 @@ pack.packSizeLimit:: packing to a file when repacking, i.e. the git:// protocol is unaffected. It can be overridden by the `--max-pack-size` option of linkgit:git-repack[1]. Reaching this limit results - in the creation of multiple packfiles; which in turn prevents - bitmaps from being created. - The minimum size allowed is limited to 1 MiB. - The default is unlimited. - Common unit suffixes of 'k', 'm', or 'g' are - supported. + in the creation of multiple packfiles. ++ +Note that this option is rarely useful, and may result in a larger total +on-disk size (because Git will not store deltas between packs), as well +as worse runtime performance (object lookup within multiple packs is +slower than a single pack, and optimizations like reachability bitmaps +cannot cope with multiple packs). ++ +If you need to actively run Git using smaller packfiles (e.g., because your +filesystem does not support large files), this option may help. But if +your goal is to transmit a packfile over a medium that supports limited +sizes (e.g., removable media that cannot store the whole repository), +you are likely better off creating a single large packfile and splitting +it using a generic multi-volume archive tool (e.g., Unix `split`). ++ +The minimum size allowed is limited to 1 MiB. The default is unlimited. +Common unit suffixes of 'k', 'm', or 'g' are supported. pack.useBitmaps:: When true, git will use pack bitmaps (if available) when packing @@ -122,6 +133,21 @@ pack.useSparse:: commits contain certain types of direct renames. Default is `true`. +pack.preferBitmapTips:: + When selecting which commits will receive bitmaps, prefer a + commit at the tip of any reference that is a suffix of any value + of this configuration over any other commits in the "selection + window". ++ +Note that setting this configuration to `refs/foo` does not mean that +the commits at the tips of `refs/foo/bar` and `refs/foo/baz` will +necessarily be selected. This is because commits are selected for +bitmaps from within a series of windows of variable length. ++ +If a commit at the tip of any reference which is a suffix of any value +of this configuration is seen in a window, it is immediately given +preference over any other commit in that window. + pack.writeBitmaps (deprecated):: This is a deprecated synonym for `repack.writeBitmaps`. @@ -133,3 +159,10 @@ pack.writeBitmapHashCache:: between an older, bitmapped pack and objects that have been pushed since the last gc). The downside is that it consumes 4 bytes per object of disk space. Defaults to true. + +pack.writeReverseIndex:: + When true, git will write a corresponding .rev file (see: + link:../technical/pack-format.html[Documentation/technical/pack-format.txt]) + for each new packfile that it writes in all places except for + linkgit:git-fast-import[1] and in the bulk checkin mechanism. + Defaults to false. diff --git a/Documentation/config/push.txt b/Documentation/config/push.txt index 21b256e0a4..632033638c 100644 --- a/Documentation/config/push.txt +++ b/Documentation/config/push.txt @@ -24,15 +24,14 @@ push.default:: * `tracking` - This is a deprecated synonym for `upstream`. -* `simple` - in centralized workflow, work like `upstream` with an - added safety to refuse to push if the upstream branch's name is - different from the local one. +* `simple` - pushes the current branch with the same name on the remote. + -When pushing to a remote that is different from the remote you normally -pull from, work as `current`. This is the safest option and is suited -for beginners. +If you are working on a centralized workflow (pushing to the same repository you +pull from, which is typically `origin`), then you need to configure an upstream +branch with the same name. + -This mode has become the default in Git 2.0. +This mode is the default since Git 2.0, and is the safest option suited for +beginners. * `matching` - push all branches having the same name on both ends. This makes the repository you are pushing to remember the set of @@ -120,3 +119,10 @@ push.useForceIfIncludes:: `--force-if-includes` as an option to linkgit:git-push[1] in the command line. Adding `--no-force-if-includes` at the time of push overrides this configuration setting. + +push.negotiate:: + If set to "true", attempt to reduce the size of the packfile + sent by rounds of negotiation in which the client and the + server attempt to find commits in common. If "false", Git will + rely solely on the server's ref advertisement to find commits + in common. diff --git a/Documentation/config/rebase.txt b/Documentation/config/rebase.txt index 7f7a07d22f..8c979cb20f 100644 --- a/Documentation/config/rebase.txt +++ b/Documentation/config/rebase.txt @@ -1,10 +1,3 @@ -rebase.useBuiltin:: - Unused configuration variable. Used in Git versions 2.20 and - 2.21 as an escape hatch to enable the legacy shellscript - implementation of rebase. Now the built-in rewrite of it in C - is always used. Setting this will emit a warning, to alert any - remaining users that setting this now does nothing. - rebase.backend:: Default backend to use for rebasing. Possible choices are 'apply' or 'merge'. In the future, if the merge backend gains @@ -68,3 +61,6 @@ rebase.rescheduleFailedExec:: Automatically reschedule `exec` commands that failed. This only makes sense in interactive mode (or when an `--exec` option was provided). This is the same as specifying the `--reschedule-failed-exec` option. + +rebase.forkPoint:: + If set to false set `--no-fork-point` option by default. diff --git a/Documentation/config/sendemail.txt b/Documentation/config/sendemail.txt index cbc5af42fd..50baa5d6bf 100644 --- a/Documentation/config/sendemail.txt +++ b/Documentation/config/sendemail.txt @@ -8,9 +8,6 @@ sendemail.smtpEncryption:: See linkgit:git-send-email[1] for description. Note that this setting is not subject to the 'identity' mechanism. -sendemail.smtpssl (deprecated):: - Deprecated alias for 'sendemail.smtpEncryption = ssl'. - sendemail.smtpsslcertpath:: Path to ca-certificates (either a directory or a single file). Set it to an empty string to disable certificate verification. diff --git a/Documentation/config/stash.txt b/Documentation/config/stash.txt index 00eb35434e..9ed775281f 100644 --- a/Documentation/config/stash.txt +++ b/Documentation/config/stash.txt @@ -5,6 +5,11 @@ stash.useBuiltin:: is always used. Setting this will emit a warning, to alert any remaining users that setting this now does nothing. +stash.showIncludeUntracked:: + If this is set to true, the `git stash show` command will show + the untracked files of a stash entry. Defaults to false. See + description of 'show' command in linkgit:git-stash[1]. + stash.showPatch:: If this is set to true, the `git stash show` command without an option will show the stash entry in patch form. Defaults to false. diff --git a/Documentation/config/submodule.txt b/Documentation/config/submodule.txt index d7a63c8c12..ee454f8126 100644 --- a/Documentation/config/submodule.txt +++ b/Documentation/config/submodule.txt @@ -58,8 +58,9 @@ submodule.active:: commands. See linkgit:gitsubmodules[7] for details. submodule.recurse:: - Specifies if commands recurse into submodules by default. This - applies to all commands that have a `--recurse-submodules` option + A boolean indicating if commands should enable the `--recurse-submodules` + option by default. + Applies to all commands that support this option (`checkout`, `fetch`, `grep`, `pull`, `push`, `read-tree`, `reset`, `restore` and `switch`) except `clone` and `ls-files`. Defaults to false. diff --git a/Documentation/config/transfer.txt b/Documentation/config/transfer.txt index 505126a780..b49429eb4d 100644 --- a/Documentation/config/transfer.txt +++ b/Documentation/config/transfer.txt @@ -52,13 +52,17 @@ If you have multiple hideRefs values, later entries override earlier ones (and entries in more-specific config files override less-specific ones). + If a namespace is in use, the namespace prefix is stripped from each -reference before it is matched against `transfer.hiderefs` patterns. +reference before it is matched against `transfer.hiderefs` patterns. In +order to match refs before stripping, add a `^` in front of the ref name. If +you combine `!` and `^`, `!` must be specified first. ++ For example, if `refs/heads/master` is specified in `transfer.hideRefs` and the current namespace is `foo`, then `refs/namespaces/foo/refs/heads/master` -is omitted from the advertisements but `refs/heads/master` and -`refs/namespaces/bar/refs/heads/master` are still advertised as so-called -"have" lines. In order to match refs before stripping, add a `^` in front of -the ref name. If you combine `!` and `^`, `!` must be specified first. +is omitted from the advertisements. If `uploadpack.allowRefInWant` is set, +`upload-pack` will treat `want-ref refs/heads/master` in a protocol v2 +`fetch` command as if `refs/namespaces/foo/refs/heads/master` did not exist. +`receive-pack`, on the other hand, will still advertise the object id the +ref is pointing to without mentioning its name (a so-called ".have" line). + Even if you hide refs, a client may still be able to steal the target objects via the techniques described in the "SECURITY" section of the diff --git a/Documentation/config/uploadpack.txt b/Documentation/config/uploadpack.txt index b0d761282c..32fad5bbe8 100644 --- a/Documentation/config/uploadpack.txt +++ b/Documentation/config/uploadpack.txt @@ -59,15 +59,16 @@ uploadpack.allowFilter:: uploadpackfilter.allow:: Provides a default value for unspecified object filters (see: the - below configuration variable). + below configuration variable). If set to `true`, this will also + enable all filters which get added in the future. Defaults to `true`. uploadpackfilter.<filter>.allow:: Explicitly allow or ban the object filter corresponding to `<filter>`, where `<filter>` may be one of: `blob:none`, - `blob:limit`, `tree`, `sparse:oid`, or `combine`. If using - combined filters, both `combine` and all of the nested filter - kinds must be allowed. Defaults to `uploadpackfilter.allow`. + `blob:limit`, `object:type`, `tree`, `sparse:oid`, or `combine`. + If using combined filters, both `combine` and all of the nested + filter kinds must be allowed. Defaults to `uploadpackfilter.allow`. uploadpackfilter.tree.maxDepth:: Only allow `--filter=tree:<n>` when `<n>` is no more than the value of diff --git a/Documentation/date-formats.txt b/Documentation/date-formats.txt index f1097fac69..99c455f51c 100644 --- a/Documentation/date-formats.txt +++ b/Documentation/date-formats.txt @@ -1,10 +1,7 @@ DATE FORMATS ------------ -The `GIT_AUTHOR_DATE`, `GIT_COMMITTER_DATE` environment variables -ifdef::git-commit[] -and the `--date` option -endif::git-commit[] +The `GIT_AUTHOR_DATE` and `GIT_COMMITTER_DATE` environment variables support the following date formats: Git internal format:: @@ -26,3 +23,9 @@ ISO 8601:: + NOTE: In addition, the date part is accepted in the following formats: `YYYY.MM.DD`, `MM/DD/YYYY` and `DD.MM.YYYY`. + +ifdef::git-commit[] +In addition to recognizing all date formats above, the `--date` option +will also try to make sense of other, more human-centric date formats, +such as relative dates like "yesterday" or "last Friday at noon". +endif::git-commit[] diff --git a/Documentation/diff-generate-patch.txt b/Documentation/diff-generate-patch.txt index b10ff4caa6..c78063d4f7 100644 --- a/Documentation/diff-generate-patch.txt +++ b/Documentation/diff-generate-patch.txt @@ -11,7 +11,7 @@ linkgit:git-diff-files[1] with the `-p` option produces patch text. You can customize the creation of patch text via the `GIT_EXTERNAL_DIFF` and the `GIT_DIFF_OPTS` environment variables -(see linkgit:git[1]). +(see linkgit:git[1]), and the `diff` attribute (see linkgit:gitattributes[5]). What the -p option produces is slightly different from the traditional diff format: @@ -74,6 +74,11 @@ separate lines indicate the old and the new mode. rename from b rename to a +5. Hunk headers mention the name of the function to which the hunk + applies. See "Defining a custom hunk-header" in + linkgit:gitattributes[5] for details of how to tailor to this to + specific languages. + Combined diff format -------------------- @@ -81,9 +86,9 @@ Combined diff format Any diff-generating command can take the `-c` or `--cc` option to produce a 'combined diff' when showing a merge. This is the default format when showing merges with linkgit:git-diff[1] or -linkgit:git-show[1]. Note also that you can give the `-m` option to any -of these commands to force generation of diffs with individual parents -of a merge. +linkgit:git-show[1]. Note also that you can give suitable +`--diff-merges` option to any of these commands to force generation of +diffs in specific format. A "combined diff" format looks like this: diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 746b144c76..c89d530d3d 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -33,6 +33,64 @@ endif::git-diff[] show the patch by default, or to cancel the effect of `--patch`. endif::git-format-patch[] +ifdef::git-log[] +--diff-merges=(off|none|on|first-parent|1|separate|m|combined|c|dense-combined|cc):: +--no-diff-merges:: + Specify diff format to be used for merge commits. Default is + {diff-merges-default} unless `--first-parent` is in use, in which case + `first-parent` is the default. ++ +--diff-merges=(off|none)::: +--no-diff-merges::: + Disable output of diffs for merge commits. Useful to override + implied value. ++ +--diff-merges=on::: +--diff-merges=m::: +-m::: + This option makes diff output for merge commits to be shown in + the default format. `-m` will produce the output only if `-p` + is given as well. The default format could be changed using + `log.diffMerges` configuration parameter, which default value + is `separate`. ++ +--diff-merges=first-parent::: +--diff-merges=1::: + This option makes merge commits show the full diff with + respect to the first parent only. ++ +--diff-merges=separate::: + This makes merge commits show the full diff with respect to + each of the parents. Separate log entry and diff is generated + for each parent. ++ +--diff-merges=combined::: +--diff-merges=c::: +-c::: + With this option, diff output for a merge commit shows the + differences from each of the parents to the merge result + simultaneously instead of showing pairwise diff between a + parent and the result one at a time. Furthermore, it lists + only files which were modified from all parents. `-c` implies + `-p`. ++ +--diff-merges=dense-combined::: +--diff-merges=cc::: +--cc::: + With this option the output produced by + `--diff-merges=combined` is further compressed by omitting + uninteresting hunks whose contents in the parents have only + two variants and the merge result picks one of them without + modification. `--cc` implies `-p`. + +--combined-all-paths:: + This flag causes combined diffs (used for merge commits) to + list the name of the file from all parents. It thus only has + effect when `--diff-merges=[dense-]combined` is in use, and + is likely only useful if filename changes are detected (i.e. + when either rename or copy detection have been requested). +endif::git-log[] + -U<n>:: --unified=<n>:: Generate diffs with <n> lines of context instead of @@ -242,11 +300,14 @@ explained for the configuration variable `core.quotePath` (see linkgit:git-config[1]). --name-only:: - Show only names of changed files. + Show only names of changed files. The file names are often encoded in UTF-8. + For more information see the discussion about encoding in the linkgit:git-log[1] + manual page. --name-status:: Show only names and status of changed files. See the description of the `--diff-filter` option on what the status letters mean. + Just like `--name-only` the file names are often encoded in UTF-8. --submodule[=<format>]:: Specify how differences in submodules are shown. When specifying @@ -527,11 +588,17 @@ When used together with `-B`, omit also the preimage in the deletion part of a delete/create pair. -l<num>:: - The `-M` and `-C` options require O(n^2) processing time where n - is the number of potential rename/copy targets. This - option prevents rename/copy detection from running if - the number of rename/copy targets exceeds the specified - number. + The `-M` and `-C` options involve some preliminary steps that + can detect subsets of renames/copies cheaply, followed by an + exhaustive fallback portion that compares all remaining + unpaired destinations to all relevant sources. (For renames, + only remaining unpaired sources are relevant; for copies, all + original sources are relevant.) For N sources and + destinations, this exhaustive check is O(N^2). This option + prevents the exhaustive portion of rename/copy detection from + running if the number of source/destination files involved + exceeds the specified number. Defaults to diff.renameLimit. + Note that a value of 0 is treated as unlimited. ifndef::git-format-patch[] --diff-filter=[(A|C|D|M|R|T|U|X|B)...[*]]:: @@ -649,6 +716,14 @@ matches a pattern if removing any number of the final pathname components matches the pattern. For example, the pattern "`foo*bar`" matches "`fooasdfbar`" and "`foo/bar/baz/asdf`" but not "`foobarx`". +--skip-to=<file>:: +--rotate-to=<file>:: + Discard the files before the named <file> from the output + (i.e. 'skip to'), or move them to the end of the output + (i.e. 'rotate to'). These were invented primarily for use + of the `git difftool` command, and may not be very useful + otherwise. + ifndef::git-format-patch[] -R:: Swap two inputs; that is, show differences from index or diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt index 2bf77b46fd..e967ff1874 100644 --- a/Documentation/fetch-options.txt +++ b/Documentation/fetch-options.txt @@ -7,6 +7,10 @@ existing contents of `.git/FETCH_HEAD`. Without this option old data in `.git/FETCH_HEAD` will be overwritten. +--atomic:: + Use an atomic transaction to update local refs. Either all refs are + updated, or on error, no refs are updated. + --depth=<depth>:: Limit fetching to the specified number of commits from the tip of each remote branch history. If fetching to a 'shallow' repository @@ -58,8 +62,17 @@ The argument to this option may be a glob on ref names, a ref, or the (possibly abbreviated) SHA-1 of a commit. Specifying a glob is equivalent to specifying this option multiple times, one for each matching ref name. + -See also the `fetch.negotiationAlgorithm` configuration variable -documented in linkgit:git-config[1]. +See also the `fetch.negotiationAlgorithm` and `push.negotiate` +configuration variables documented in linkgit:git-config[1], and the +`--negotiate-only` option below. + +--negotiate-only:: + Do not fetch anything from the server, and instead print the + ancestors of the provided `--negotiation-tip=*` arguments, + which we have in common with the server. ++ +Internally this is used to implement the `push.negotiate` option, see +linkgit:git-config[1]. --dry-run:: Show what would be done, without making any changes. @@ -106,6 +119,11 @@ ifndef::git-pull[] setting `fetch.writeCommitGraph`. endif::git-pull[] +--prefetch:: + Modify the configured refspec to place all refs into the + `refs/prefetch/` namespace. See the `prefetch` task in + linkgit:git-maintenance[1]. + -p:: --prune:: Before fetching, remove any remote-tracking references that no diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt index 06bc063542..0a4a984dfd 100644 --- a/Documentation/git-am.txt +++ b/Documentation/git-am.txt @@ -15,6 +15,7 @@ SYNOPSIS [--whitespace=<option>] [-C<n>] [-p<n>] [--directory=<dir>] [--exclude=<path>] [--include=<path>] [--reject] [-q | --quiet] [--[no-]scissors] [-S[<keyid>]] [--patch-format=<format>] + [--quoted-cr=<action>] [(<mbox> | <Maildir>)...] 'git am' (--continue | --skip | --abort | --quit | --show-current-patch[=(diff|raw)]) @@ -59,6 +60,9 @@ OPTIONS --no-scissors:: Ignore scissors lines (see linkgit:git-mailinfo[1]). +--quoted-cr=<action>:: + This flag will be passed down to 'git mailinfo' (see linkgit:git-mailinfo[1]). + -m:: --message-id:: Pass the `-m` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]), @@ -79,7 +83,7 @@ OPTIONS Pass `-u` flag to 'git mailinfo' (see linkgit:git-mailinfo[1]). The proposed commit log message taken from the e-mail is re-coded into UTF-8 encoding (configuration variable - `i18n.commitencoding` can be used to specify project's + `i18n.commitEncoding` can be used to specify project's preferred encoding if it is not UTF-8). + This was optional in prior versions of git, but now it is the @@ -174,6 +178,8 @@ default. You can use `--no-utf8` to override this. --abort:: Restore the original branch and abort the patching operation. + Revert contents of files involved in the am operation to their + pre-am state. --quit:: Abort the patching operation but keep HEAD and the index diff --git a/Documentation/git-apply.txt b/Documentation/git-apply.txt index 91d9a8601c..aa1ae56a25 100644 --- a/Documentation/git-apply.txt +++ b/Documentation/git-apply.txt @@ -84,12 +84,13 @@ OPTIONS -3:: --3way:: - When the patch does not apply cleanly, fall back on 3-way merge if - the patch records the identity of blobs it is supposed to apply to, - and we have those blobs available locally, possibly leaving the + Attempt 3-way merge if the patch records the identity of blobs it is supposed + to apply to and we have those blobs available locally, possibly leaving the conflict markers in the files in the working tree for the user to - resolve. This option implies the `--index` option, and is incompatible - with the `--reject` and the `--cached` options. + resolve. This option implies the `--index` option unless the + `--cached` option is used, and is incompatible with the `--reject` option. + When used with the `--cached` option, any conflicts are left at higher stages + in the cache. --build-fake-ancestor=<file>:: Newer 'git diff' output has embedded 'index information' diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt index 34b496d485..3bf5d5d8b4 100644 --- a/Documentation/git-blame.txt +++ b/Documentation/git-blame.txt @@ -226,7 +226,7 @@ commit commentary), a blame viewer will not care. MAPPING AUTHORS --------------- -include::mailmap.txt[] +See linkgit:gitmailmap[5]. SEE ALSO diff --git a/Documentation/git-branch.txt b/Documentation/git-branch.txt index adaa1782a8..5449767121 100644 --- a/Documentation/git-branch.txt +++ b/Documentation/git-branch.txt @@ -78,8 +78,8 @@ renaming. If <newbranch> exists, -M must be used to force the rename to happen. The `-c` and `-C` options have the exact same semantics as `-m` and -`-M`, except instead of the branch being renamed it along with its -config and reflog will be copied to a new name. +`-M`, except instead of the branch being renamed, it will be copied to a +new name, along with its config and reflog. With a `-d` or `-D` option, `<branchname>` will be deleted. You may specify more than one branch for deletion. If the branch currently @@ -118,7 +118,8 @@ OPTIONS Reset <branchname> to <startpoint>, even if <branchname> exists already. Without `-f`, 'git branch' refuses to change an existing branch. In combination with `-d` (or `--delete`), allow deleting the - branch irrespective of its merged status. In combination with + branch irrespective of its merged status, or whether it even + points to a valid commit. In combination with `-m` (or `--move`), allow renaming the branch even if the new branch name already exists, the same applies for `-c` (or `--copy`). @@ -153,7 +154,7 @@ OPTIONS --column[=<options>]:: --no-column:: Display branch listing in columns. See configuration variable - column.branch for option syntax.`--column` and `--no-column` + `column.branch` for option syntax. `--column` and `--no-column` without options are equivalent to 'always' and 'never' respectively. + This option is only applicable in non-verbose mode. diff --git a/Documentation/git-bugreport.txt b/Documentation/git-bugreport.txt index 66e88c2e31..d8817bf3ce 100644 --- a/Documentation/git-bugreport.txt +++ b/Documentation/git-bugreport.txt @@ -40,8 +40,8 @@ OPTIONS ------- -o <path>:: --output-directory <path>:: - Place the resulting bug report file in `<path>` instead of the root of - the Git repository. + Place the resulting bug report file in `<path>` instead of the current + directory. -s <format>:: --suffix <format>:: diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt index 53804cad4b..71b5ecabd1 100644 --- a/Documentation/git-bundle.txt +++ b/Documentation/git-bundle.txt @@ -13,26 +13,53 @@ SYNOPSIS [--version=<version>] <file> <git-rev-list-args> 'git bundle' verify [-q | --quiet] <file> 'git bundle' list-heads <file> [<refname>...] -'git bundle' unbundle <file> [<refname>...] +'git bundle' unbundle [--progress] <file> [<refname>...] DESCRIPTION ----------- -Some workflows require that one or more branches of development on one -machine be replicated on another machine, but the two machines cannot -be directly connected, and therefore the interactive Git protocols (git, -ssh, http) cannot be used. +Create, unpack, and manipulate "bundle" files. Bundles are used for +the "offline" transfer of Git objects without an active "server" +sitting on the other side of the network connection. -The 'git bundle' command packages objects and references in an archive -at the originating machine, which can then be imported into another -repository using 'git fetch', 'git pull', or 'git clone', -after moving the archive by some means (e.g., by sneakernet). +They can be used to create both incremental and full backups of a +repository, and to relay the state of the references in one repository +to another. -As no -direct connection between the repositories exists, the user must specify a -basis for the bundle that is held by the destination repository: the -bundle assumes that all objects in the basis are already in the -destination repository. +Git commands that fetch or otherwise "read" via protocols such as +`ssh://` and `https://` can also operate on bundle files. It is +possible linkgit:git-clone[1] a new repository from a bundle, to use +linkgit:git-fetch[1] to fetch from one, and to list the references +contained within it with linkgit:git-ls-remote[1]. There's no +corresponding "write" support, i.e.a 'git push' into a bundle is not +supported. + +See the "EXAMPLES" section below for examples of how to use bundles. + +BUNDLE FORMAT +------------- + +Bundles are `.pack` files (see linkgit:git-pack-objects[1]) with a +header indicating what references are contained within the bundle. + +Like the the packed archive format itself bundles can either be +self-contained, or be created using exclusions. +See the "OBJECT PREREQUISITES" section below. + +Bundles created using revision exclusions are "thin packs" created +using the `--thin` option to linkgit:git-pack-objects[1], and +unbundled using the `--fix-thin` option to linkgit:git-index-pack[1]. + +There is no option to create a "thick pack" when using revision +exclusions, users should not be concerned about the difference. By +using "thin packs" bundles created using exclusions are smaller in +size. That they're "thin" under the hood is merely noted here as a +curiosity, and as a reference to other documentation + +See link:technical/bundle-format.html[the `bundle-format` +documentation] for more details and the discussion of "thin pack" in +link:technical/pack-format.html[the pack format documentation] for +further details. OPTIONS ------- @@ -117,28 +144,88 @@ unbundle <file>:: SPECIFYING REFERENCES --------------------- -'git bundle' will only package references that are shown by -'git show-ref': this includes heads, tags, and remote heads. References -such as `master~1` cannot be packaged, but are perfectly suitable for -defining the basis. More than one reference may be packaged, and more -than one basis can be specified. The objects packaged are those not -contained in the union of the given bases. Each basis can be -specified explicitly (e.g. `^master~10`), or implicitly (e.g. -`master~10..master`, `--since=10.days.ago master`). +Revisions must accompanied by reference names to be packaged in a +bundle. + +More than one reference may be packaged, and more than one set of prerequisite objects can +be specified. The objects packaged are those not contained in the +union of the prerequisites. + +The 'git bundle create' command resolves the reference names for you +using the same rules as `git rev-parse --abbrev-ref=loose`. Each +prerequisite can be specified explicitly (e.g. `^master~10`), or implicitly +(e.g. `master~10..master`, `--since=10.days.ago master`). + +All of these simple cases are OK (assuming we have a "master" and +"next" branch): + +---------------- +$ git bundle create master.bundle master +$ echo master | git bundle create master.bundle --stdin +$ git bundle create master-and-next.bundle master next +$ (echo master; echo next) | git bundle create master-and-next.bundle --stdin +---------------- + +And so are these (and the same but omitted `--stdin` examples): + +---------------- +$ git bundle create recent-master.bundle master~10..master +$ git bundle create recent-updates.bundle master~10..master next~5..next +---------------- + +A revision name or a range whose right-hand-side cannot be resolved to +a reference is not accepted: + +---------------- +$ git bundle create HEAD.bundle $(git rev-parse HEAD) +fatal: Refusing to create empty bundle. +$ git bundle create master-yesterday.bundle master~10..master~5 +fatal: Refusing to create empty bundle. +---------------- + +OBJECT PREREQUISITES +-------------------- + +When creating bundles it is possible to create a self-contained bundle +that can be unbundled in a repository with no common history, as well +as providing negative revisions to exclude objects needed in the +earlier parts of the history. + +Feeding a revision such as `new` to `git bundle create` will create a +bundle file that contains all the objects reachable from the revision +`new`. That bundle can be unbundled in any repository to obtain a full +history that leads to the revision `new`: + +---------------- +$ git bundle create full.bundle new +---------------- + +A revision range such as `old..new` will produce a bundle file that +will require the revision `old` (and any objects reachable from it) +to exist for the bundle to be "unbundle"-able: + +---------------- +$ git bundle create full.bundle old..new +---------------- + +A self-contained bundle without any prerequisites can be extracted +into anywhere, even into an empty repository, or be cloned from +(i.e., `new`, but not `old..new`). -It is very important that the basis used be held by the destination. It is okay to err on the side of caution, causing the bundle file to contain objects already in the destination, as these are ignored when unpacking at the destination. -`git clone` can use any bundle created without negative refspecs -(e.g., `new`, but not `old..new`). If you want to match `git clone --mirror`, which would include your refs such as `refs/remotes/*`, use `--all`. If you want to provide the same set of refs that a clone directly from the source repository would get, use `--branches --tags` for the `<git-rev-list-args>`. +The 'git bundle verify' command can be used to check whether your +recipient repository has the required prerequisite commits for a +bundle. + EXAMPLES -------- @@ -149,7 +236,7 @@ but we can move data from A to B via some mechanism (CD, email, etc.). We want to update R2 with development made on the branch master in R1. To bootstrap the process, you can first create a bundle that does not have -any basis. You can use a tag to remember up to what commit you last +any prerequisites. You can use a tag to remember up to what commit you last processed, in order to make it easy to later update the other repository with an incremental bundle: @@ -200,7 +287,7 @@ machineB$ git pull If you know up to what commit the intended recipient repository should have the necessary objects, you can use that knowledge to specify the -basis, giving a cut-off point to limit the revisions and objects that go +prerequisites, giving a cut-off point to limit the revisions and objects that go in the resulting bundle. The previous example used the lastR2bundle tag for this purpose, but you can use any other options that you would give to the linkgit:git-log[1] command. Here are more examples: @@ -211,7 +298,7 @@ You can use a tag that is present in both: $ git bundle create mybundle v1.0.0..master ---------------- -You can use a basis based on time: +You can use a prerequisite based on time: ---------------- $ git bundle create mybundle --since=10.days master @@ -224,7 +311,7 @@ $ git bundle create mybundle -10 master ---------------- You can run `git-bundle verify` to see if you can extract from a bundle -that was created with a basis: +that was created with a prerequisite: ---------------- $ git bundle verify mybundle diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt index 8e192d87db..4eb0421b3f 100644 --- a/Documentation/git-cat-file.txt +++ b/Documentation/git-cat-file.txt @@ -35,42 +35,42 @@ OPTIONS -t:: Instead of the content, show the object type identified by - <object>. + `<object>`. -s:: Instead of the content, show the object size identified by - <object>. + `<object>`. -e:: - Exit with zero status if <object> exists and is a valid - object. If <object> is of an invalid format exit with non-zero and + Exit with zero status if `<object>` exists and is a valid + object. If `<object>` is of an invalid format exit with non-zero and emits an error on stderr. -p:: - Pretty-print the contents of <object> based on its type. + Pretty-print the contents of `<object>` based on its type. <type>:: - Typically this matches the real type of <object> but asking + Typically this matches the real type of `<object>` but asking for a type that can trivially be dereferenced from the given - <object> is also permitted. An example is to ask for a - "tree" with <object> being a commit object that contains it, - or to ask for a "blob" with <object> being a tag object that + `<object>` is also permitted. An example is to ask for a + "tree" with `<object>` being a commit object that contains it, + or to ask for a "blob" with `<object>` being a tag object that points at it. --textconv:: Show the content as transformed by a textconv filter. In this case, - <object> has to be of the form <tree-ish>:<path>, or :<path> in + `<object>` has to be of the form `<tree-ish>:<path>`, or `:<path>` in order to apply the filter to the content recorded in the index at - <path>. + `<path>`. --filters:: Show the content as converted by the filters configured in - the current working tree for the given <path> (i.e. smudge filters, - end-of-line conversion, etc). In this case, <object> has to be of - the form <tree-ish>:<path>, or :<path>. + the current working tree for the given `<path>` (i.e. smudge filters, + end-of-line conversion, etc). In this case, `<object>` has to be of + the form `<tree-ish>:<path>`, or `:<path>`. --path=<path>:: - For use with --textconv or --filters, to allow specifying an object + For use with `--textconv` or `--filters`, to allow specifying an object name and a path separately, e.g. when it is difficult to figure out the revision from which the blob came. @@ -115,15 +115,15 @@ OPTIONS repository. --allow-unknown-type:: - Allow -s or -t to query broken/corrupt objects of unknown type. + Allow `-s` or `-t` to query broken/corrupt objects of unknown type. --follow-symlinks:: - With --batch or --batch-check, follow symlinks inside the + With `--batch` or `--batch-check`, follow symlinks inside the repository when requesting objects with extended SHA-1 expressions of the form tree-ish:path-in-tree. Instead of providing output about the link itself, provide output about the linked-to object. If a symlink points outside the - tree-ish (e.g. a link to /foo or a root-level link to ../foo), + tree-ish (e.g. a link to `/foo` or a root-level link to `../foo`), the portion of the link which is outside the tree will be printed. + @@ -175,15 +175,15 @@ respectively print: OUTPUT ------ -If `-t` is specified, one of the <type>. +If `-t` is specified, one of the `<type>`. -If `-s` is specified, the size of the <object> in bytes. +If `-s` is specified, the size of the `<object>` in bytes. -If `-e` is specified, no output, unless the <object> is malformed. +If `-e` is specified, no output, unless the `<object>` is malformed. -If `-p` is specified, the contents of <object> are pretty-printed. +If `-p` is specified, the contents of `<object>` are pretty-printed. -If <type> is specified, the raw (though uncompressed) contents of the <object> +If `<type>` is specified, the raw (though uncompressed) contents of the `<object>` will be returned. BATCH OUTPUT @@ -200,7 +200,7 @@ object, with placeholders of the form `%(atom)` expanded, followed by a newline. The available atoms are: `objectname`:: - The 40-hex object name of the object. + The full hex representation of the object name. `objecttype`:: The type of the object (the same as `cat-file -t` reports). @@ -215,8 +215,9 @@ newline. The available atoms are: `deltabase`:: If the object is stored as a delta on-disk, this expands to the - 40-hex sha1 of the delta base object. Otherwise, expands to the - null sha1 (40 zeroes). See `CAVEATS` below. + full hex representation of the delta base object name. + Otherwise, expands to the null OID (all zeroes). See `CAVEATS` + below. `rest`:: If this atom is used in the output string, input lines are split @@ -235,14 +236,14 @@ newline. For example, `--batch` without a custom format would produce: ------------ -<sha1> SP <type> SP <size> LF +<oid> SP <type> SP <size> LF <contents> LF ------------ Whereas `--batch-check='%(objectname) %(objecttype)'` would produce: ------------ -<sha1> SP <type> LF +<oid> SP <type> LF ------------ If a name is specified on stdin that cannot be resolved to an object in @@ -258,7 +259,7 @@ If a name is specified that might refer to more than one object (an ambiguous sh <object> SP ambiguous LF ------------ -If --follow-symlinks is used, and a symlink in the repository points +If `--follow-symlinks` is used, and a symlink in the repository points outside the repository, then `cat-file` will ignore any custom format and print: @@ -267,11 +268,11 @@ symlink SP <size> LF <symlink> LF ------------ -The symlink will either be absolute (beginning with a /), or relative -to the tree root. For instance, if dir/link points to ../../foo, then -<symlink> will be ../foo. <size> is the size of the symlink in bytes. +The symlink will either be absolute (beginning with a `/`), or relative +to the tree root. For instance, if dir/link points to `../../foo`, then +`<symlink>` will be `../foo`. `<size>` is the size of the symlink in bytes. -If --follow-symlinks is used, the following error messages will be +If `--follow-symlinks` is used, the following error messages will be displayed: ------------ diff --git a/Documentation/git-check-mailmap.txt b/Documentation/git-check-mailmap.txt index aa2055dbeb..02f4418323 100644 --- a/Documentation/git-check-mailmap.txt +++ b/Documentation/git-check-mailmap.txt @@ -36,10 +36,17 @@ name is provided or known to the 'mailmap', ``Name $$<user@host>$$'' is printed; otherwise only ``$$<user@host>$$'' is printed. +CONFIGURATION +------------- + +See `mailmap.file` and `mailmap.blob` in linkgit:git-config[1] for how +to specify a custom `.mailmap` target file or object. + + MAPPING AUTHORS --------------- -include::mailmap.txt[] +See linkgit:gitmailmap[5]. GIT diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index 02d9c19cec..3fe3810f1c 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -15,7 +15,7 @@ SYNOPSIS [--dissociate] [--separate-git-dir <git dir>] [--depth <depth>] [--[no-]single-branch] [--no-tags] [--recurse-submodules[=<pathspec>]] [--[no-]shallow-submodules] - [--[no-]remote-submodules] [--jobs <n>] [--sparse] + [--[no-]remote-submodules] [--jobs <n>] [--sparse] [--[no-]reject-shallow] [--filter=<filter>] [--] <repository> [<directory>] @@ -149,6 +149,11 @@ objects from the source repository into a pack in the cloned repository. --no-checkout:: No checkout of HEAD is performed after the clone is complete. +--[no-]reject-shallow:: + Fail if the source repository is a shallow repository. + The 'clone.rejectShallow' configuration variable can be used to + specify the default. + --bare:: Make a 'bare' Git repository. That is, instead of creating `<directory>` and placing the administrative diff --git a/Documentation/git-column.txt b/Documentation/git-column.txt index f58e9c43e6..6cea9ab463 100644 --- a/Documentation/git-column.txt +++ b/Documentation/git-column.txt @@ -39,7 +39,7 @@ OPTIONS --indent=<string>:: String to be printed at the beginning of each line. ---nl=<N>:: +--nl=<string>:: String to be printed at the end of each line, including newline character. diff --git a/Documentation/git-commit.txt b/Documentation/git-commit.txt index 17150fa7ea..95fec5f069 100644 --- a/Documentation/git-commit.txt +++ b/Documentation/git-commit.txt @@ -9,12 +9,13 @@ SYNOPSIS -------- [verse] 'git commit' [-a | --interactive | --patch] [-s] [-v] [-u<mode>] [--amend] - [--dry-run] [(-c | -C | --fixup | --squash) <commit>] + [--dry-run] [(-c | -C | --squash) <commit> | --fixup [(amend|reword):]<commit>)] [-F <file> | -m <msg>] [--reset-author] [--allow-empty] [--allow-empty-message] [--no-verify] [-e] [--author=<author>] [--date=<date>] [--cleanup=<mode>] [--[no-]status] [-i | -o] [--pathspec-from-file=<file> [--pathspec-file-nul]] - [-S[<keyid>]] [--] [<pathspec>...] + [(--trailer <token>[(=|:)<value>])...] [-S[<keyid>]] + [--] [<pathspec>...] DESCRIPTION ----------- @@ -71,7 +72,7 @@ OPTIONS -p:: --patch:: - Use the interactive patch selection interface to chose + Use the interactive patch selection interface to choose which changes to commit. See linkgit:git-add[1] for details. @@ -86,11 +87,44 @@ OPTIONS Like '-C', but with `-c` the editor is invoked, so that the user can further edit the commit message. ---fixup=<commit>:: - Construct a commit message for use with `rebase --autosquash`. - The commit message will be the subject line from the specified - commit with a prefix of "fixup! ". See linkgit:git-rebase[1] - for details. +--fixup=[(amend|reword):]<commit>:: + Create a new commit which "fixes up" `<commit>` when applied with + `git rebase --autosquash`. Plain `--fixup=<commit>` creates a + "fixup!" commit which changes the content of `<commit>` but leaves + its log message untouched. `--fixup=amend:<commit>` is similar but + creates an "amend!" commit which also replaces the log message of + `<commit>` with the log message of the "amend!" commit. + `--fixup=reword:<commit>` creates an "amend!" commit which + replaces the log message of `<commit>` with its own log message + but makes no changes to the content of `<commit>`. ++ +The commit created by plain `--fixup=<commit>` has a subject +composed of "fixup!" followed by the subject line from <commit>, +and is recognized specially by `git rebase --autosquash`. The `-m` +option may be used to supplement the log message of the created +commit, but the additional commentary will be thrown away once the +"fixup!" commit is squashed into `<commit>` by +`git rebase --autosquash`. ++ +The commit created by `--fixup=amend:<commit>` is similar but its +subject is instead prefixed with "amend!". The log message of +<commit> is copied into the log message of the "amend!" commit and +opened in an editor so it can be refined. When `git rebase +--autosquash` squashes the "amend!" commit into `<commit>`, the +log message of `<commit>` is replaced by the refined log message +from the "amend!" commit. It is an error for the "amend!" commit's +log message to be empty unless `--allow-empty-message` is +specified. ++ +`--fixup=reword:<commit>` is shorthand for `--fixup=amend:<commit> +--only`. It creates an "amend!" commit with only a log message +(ignoring any changes staged in the index). When squashed by `git +rebase --autosquash`, it replaces the log message of `<commit>` +without making any other changes. ++ +Neither "fixup!" nor "amend!" commits change authorship of +`<commit>` when applied by `git rebase --autosquash`. +See linkgit:git-rebase[1] for details. --squash=<commit>:: Construct a commit message for use with `rebase --autosquash`. @@ -166,6 +200,17 @@ The `-m` option is mutually exclusive with `-c`, `-C`, and `-F`. include::signoff-option.txt[] +--trailer <token>[(=|:)<value>]:: + Specify a (<token>, <value>) pair that should be applied as a + trailer. (e.g. `git commit --trailer "Signed-off-by:C O Mitter \ + <committer@example.com>" --trailer "Helped-by:C O Mitter \ + <committer@example.com>"` will add the "Signed-off-by" trailer + and the "Helped-by" trailer to the commit message.) + The `trailer.*` configuration variables + (linkgit:git-interpret-trailers[1]) can be used to define if + a duplicated trailer is omitted, where in the run of trailers + each trailer would appear, and other details. + -n:: --no-verify:: This option bypasses the pre-commit and commit-msg hooks. diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt index 0e9351d3cb..992225f612 100644 --- a/Documentation/git-config.txt +++ b/Documentation/git-config.txt @@ -71,6 +71,10 @@ codes are: On success, the command returns the exit code 0. +A list of all available configuration variables can be obtained using the +`git help --config` command. + +[[OPTIONS]] OPTIONS ------- @@ -143,7 +147,13 @@ See also <<FILES>>. -f config-file:: --file config-file:: - Use the given config file instead of the one specified by GIT_CONFIG. + For writing options: write to the specified file rather than the + repository `.git/config`. ++ +For reading options: read only from the specified file rather than from all +available files. ++ +See also <<FILES>>. --blob blob:: Similar to `--file` but use the given blob instead of a file. E.g. @@ -325,20 +335,18 @@ All writing options will per default write to the repository specific configuration file. Note that this also affects options like `--replace-all` and `--unset`. *'git config' will only ever change one file at a time*. -You can override these rules either by command-line options or by environment -variables. The `--global`, `--system` and `--worktree` options will limit -the file used to the global, system-wide or per-worktree file respectively. -The `GIT_CONFIG` environment variable has a similar effect, but you -can specify any filename you want. +You can override these rules using the `--global`, `--system`, +`--local`, `--worktree`, and `--file` command-line options; see +<<OPTIONS>> above. ENVIRONMENT ----------- -GIT_CONFIG:: - Take the configuration from the given file instead of .git/config. - Using the "--global" option forces this to ~/.gitconfig. Using the - "--system" option forces this to $(prefix)/etc/gitconfig. +GIT_CONFIG_GLOBAL:: +GIT_CONFIG_SYSTEM:: + Take the configuration from the given files instead from global or + system-level configuration. See linkgit:git[1] for details. GIT_CONFIG_NOSYSTEM:: Whether to skip reading settings from the system-wide @@ -346,6 +354,28 @@ GIT_CONFIG_NOSYSTEM:: See also <<FILES>>. +GIT_CONFIG_COUNT:: +GIT_CONFIG_KEY_<n>:: +GIT_CONFIG_VALUE_<n>:: + If GIT_CONFIG_COUNT is set to a positive number, all environment pairs + GIT_CONFIG_KEY_<n> and GIT_CONFIG_VALUE_<n> up to that number will be + added to the process's runtime configuration. The config pairs are + zero-indexed. Any missing key or value is treated as an error. An empty + GIT_CONFIG_COUNT is treated the same as GIT_CONFIG_COUNT=0, namely no + pairs are processed. These environment variables will override values + in configuration files, but will be overridden by any explicit options + passed via `git -c`. ++ +This is useful for cases where you want to spawn multiple git commands +with a common configuration but cannot depend on a configuration file, +for example when writing scripts. + +GIT_CONFIG:: + If no `--file` option is provided to `git config`, use the file + given by `GIT_CONFIG` as if it were provided via `--file`. This + variable has no effect on other Git commands, and is mostly for + historical compatibility; there is generally no reason to use it + instead of the `--file` option. [[EXAMPLES]] EXAMPLES diff --git a/Documentation/git-credential.txt b/Documentation/git-credential.txt index 31c81c4c02..206e3c5f40 100644 --- a/Documentation/git-credential.txt +++ b/Documentation/git-credential.txt @@ -159,3 +159,7 @@ empty string. + Components which are missing from the URL (e.g., there is no username in the example above) will be left unset. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/git-cvsserver.txt b/Documentation/git-cvsserver.txt index 1b1c71ad9d..4dc57ed254 100644 --- a/Documentation/git-cvsserver.txt +++ b/Documentation/git-cvsserver.txt @@ -24,6 +24,18 @@ Usage: [verse] 'git-cvsserver' [<options>] [pserver|server] [<directory> ...] +DESCRIPTION +----------- + +This application is a CVS emulation layer for Git. + +It is highly functional. However, not all methods are implemented, +and for those methods that are implemented, +not all switches are implemented. + +Testing has been done using both the CLI CVS client, and the Eclipse CVS +plugin. Most functionality works fine with both of these clients. + OPTIONS ------- @@ -57,18 +69,6 @@ access still needs to be enabled by the `gitcvs.enabled` config option unless `--export-all` was given, too. -DESCRIPTION ------------ - -This application is a CVS emulation layer for Git. - -It is highly functional. However, not all methods are implemented, -and for those methods that are implemented, -not all switches are implemented. - -Testing has been done using both the CLI CVS client, and the Eclipse CVS -plugin. Most functionality works fine with both of these clients. - LIMITATIONS ----------- @@ -99,7 +99,7 @@ looks like ------ -Only anonymous access is provided by pserve by default. To commit you +Only anonymous access is provided by pserver by default. To commit you will have to create pserver accounts, simply add a gitcvs.authdb setting in the config file of the repositories you want the cvsserver to allow writes to, for example: @@ -114,21 +114,20 @@ The format of these files is username followed by the encrypted password, for example: ------ - myuser:$1Oyx5r9mdGZ2 - myuser:$1$BA)@$vbnMJMDym7tA32AamXrm./ + myuser:sqkNi8zPf01HI + myuser:$1$9K7FzU28$VfF6EoPYCJEYcVQwATgOP/ + myuser:$5$.NqmNH1vwfzGpV8B$znZIcumu1tNLATgV2l6e1/mY8RzhUDHMOaVOeL1cxV3 ------ You can use the 'htpasswd' facility that comes with Apache to make these -files, but Apache's MD5 crypt method differs from the one used by most C -library's crypt() function, so don't use the -m option. +files, but only with the -d option (or -B if your system suports it). -Alternatively you can produce the password with perl's crypt() operator: ------ - perl -e 'my ($user, $pass) = @ARGV; printf "%s:%s\n", $user, crypt($user, $pass)' $USER password ------ +Preferably use the system specific utility that manages password hash +creation in your platform (e.g. mkpasswd in Linux, encrypt in OpenBSD or +pwhash in NetBSD) and paste it in the right location. Then provide your password via the pserver method, for example: ------ - cvs -d:pserver:someuser:somepassword <at> server/path/repo.git co <HEAD_name> + cvs -d:pserver:someuser:somepassword@server:/path/repo.git co <HEAD_name> ------ No special setup is needed for SSH access, other than having Git tools in the PATH. If you have clients that do not accept the CVS_SERVER @@ -138,7 +137,7 @@ Note: Newer CVS versions (>= 1.12.11) also support specifying CVS_SERVER directly in CVSROOT like ------ -cvs -d ":ext;CVS_SERVER=git cvsserver:user@server/path/repo.git" co <HEAD_name> + cvs -d ":ext;CVS_SERVER=git cvsserver:user@server/path/repo.git" co <HEAD_name> ------ This has the advantage that it will be saved in your 'CVS/Root' files and you don't need to worry about always setting the correct environment @@ -186,8 +185,8 @@ allowing access over SSH. + -- ------ - export CVSROOT=:ext:user@server:/var/git/project.git - export CVS_SERVER="git cvsserver" + export CVSROOT=:ext:user@server:/var/git/project.git + export CVS_SERVER="git cvsserver" ------ -- 4. For SSH clients that will make commits, make sure their server-side @@ -203,7 +202,7 @@ allowing access over SSH. `project-master` directory: + ------ - cvs co -d project-master master + cvs co -d project-master master ------ [[dbbackend]] diff --git a/Documentation/git-describe.txt b/Documentation/git-describe.txt index a88f6ae2c6..c6a79c2a0f 100644 --- a/Documentation/git-describe.txt +++ b/Documentation/git-describe.txt @@ -63,9 +63,10 @@ OPTIONS Automatically implies --tags. --abbrev=<n>:: - Instead of using the default 7 hexadecimal digits as the - abbreviated object name, use <n> digits, or as many digits - as needed to form a unique object name. An <n> of 0 + Instead of using the default number of hexadecimal digits (which + will vary according to the number of objects in the repository with + a default of 7) of the abbreviated object name, use <n> digits, or + as many digits as needed to form a unique object name. An <n> of 0 will suppress long format, only showing the closest tag. --candidates=<n>:: @@ -139,8 +140,11 @@ at the end. The number of additional commits is the number of commits which would be displayed by "git log v1.0.4..parent". -The hash suffix is "-g" + unambiguous abbreviation for the tip commit -of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). +The hash suffix is "-g" + an unambigous abbreviation for the tip commit +of parent (which was `2414721b194453f058079d897d13c4e377f92dc6`). The +length of the abbreviation scales as the repository grows, using the +approximate number of objects in the repository and a bit of math +around the birthday paradox, and defaults to a minimum of 7. The "g" prefix stands for "git" and is used to allow describing the version of a software depending on the SCM the software is managed with. This is useful in an environment where people may use different SCMs. diff --git a/Documentation/git-diff.txt b/Documentation/git-diff.txt index 7f4c8a8ce7..6236c75c9b 100644 --- a/Documentation/git-diff.txt +++ b/Documentation/git-diff.txt @@ -51,16 +51,20 @@ files on disk. --staged is a synonym of --cached. + If --merge-base is given, instead of using <commit>, use the merge base -of <commit> and HEAD. `git diff --merge-base A` is equivalent to -`git diff $(git merge-base A HEAD)`. +of <commit> and HEAD. `git diff --cached --merge-base A` is equivalent to +`git diff --cached $(git merge-base A HEAD)`. -'git diff' [<options>] <commit> [--] [<path>...]:: +'git diff' [<options>] [--merge-base] <commit> [--] [<path>...]:: This form is to view the changes you have in your working tree relative to the named <commit>. You can use HEAD to compare it with the latest commit, or a branch name to compare with the tip of a different branch. ++ +If --merge-base is given, instead of using <commit>, use the merge base +of <commit> and HEAD. `git diff --merge-base A` is equivalent to +`git diff $(git merge-base A HEAD)`. 'git diff' [<options>] [--merge-base] <commit> <commit> [--] [<path>...]:: diff --git a/Documentation/git-difftool.txt b/Documentation/git-difftool.txt index 484c485fd0..143b0c49d7 100644 --- a/Documentation/git-difftool.txt +++ b/Documentation/git-difftool.txt @@ -34,6 +34,14 @@ OPTIONS This is the default behaviour; the option is provided to override any configuration settings. +--rotate-to=<file>:: + Start showing the diff for the given path, + the paths before it will move to end and output. + +--skip-to=<file>:: + Start showing the diff for the given path, skipping all + the paths before it. + -t <tool>:: --tool=<tool>:: Use the diff tool specified by <tool>. Valid values include diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt index 9067c2079e..550c16ca61 100644 --- a/Documentation/git-fetch.txt +++ b/Documentation/git-fetch.txt @@ -133,7 +133,7 @@ remember to run that, set `fetch.prune` globally, or linkgit:git-config[1]. Here's where things get tricky and more specific. The pruning feature -doesn't actually care about branches, instead it'll prune local <-> +doesn't actually care about branches, instead it'll prune local <--> remote-references as a function of the refspec of the remote (see `<refspec>` and <<CRTB,CONFIGURED REMOTE-TRACKING BRANCHES>> above). diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt index 2962f85a50..6da899c629 100644 --- a/Documentation/git-for-each-ref.txt +++ b/Documentation/git-for-each-ref.txt @@ -235,6 +235,15 @@ and `date` to extract the named component. For email fields (`authoremail`, without angle brackets, and `:localpart` to get the part before the `@` symbol out of the trimmed email. +The raw data in an object is `raw`. + +raw:size:: + The raw data size of the object. + +Note that `--format=%(raw)` can not be used with `--python`, `--shell`, `--tcl`, +because such language may not support arbitrary binary data in their string +variable type. + The message in a commit or a tag object is `contents`, from which `contents:<part>` can be used to extract various parts out of: @@ -260,11 +269,9 @@ contents:lines=N:: The first `N` lines of the message. Additionally, the trailers as interpreted by linkgit:git-interpret-trailers[1] -are obtained as `trailers` (or by using the historical alias -`contents:trailers`). Non-trailer lines from the trailer block can be omitted -with `trailers:only`. Whitespace-continuations can be removed from trailers so -that each trailer appears on a line by itself with its full content with -`trailers:unfold`. Both can be used together as `trailers:unfold,only`. +are obtained as `trailers[:options]` (or by using the historical alias +`contents:trailers[:options]`). For valid [:option] values see `trailers` +section of linkgit:git-log[1]. For sorting purposes, fields with numeric values sort in numeric order (`objectsize`, `authordate`, `committerdate`, `creatordate`, `taggerdate`). diff --git a/Documentation/git-format-patch.txt b/Documentation/git-format-patch.txt index 3e49bf2210..fe2f69d36e 100644 --- a/Documentation/git-format-patch.txt +++ b/Documentation/git-format-patch.txt @@ -36,11 +36,28 @@ SYNOPSIS DESCRIPTION ----------- -Prepare each commit with its patch in -one file per commit, formatted to resemble UNIX mailbox format. +Prepare each non-merge commit with its "patch" in +one "message" per commit, formatted to resemble a UNIX mailbox. The output of this command is convenient for e-mail submission or for use with 'git am'. +A "message" generated by the command consists of three parts: + +* A brief metadata header that begins with `From <commit>` + with a fixed `Mon Sep 17 00:00:00 2001` datestamp to help programs + like "file(1)" to recognize that the file is an output from this + command, fields that record the author identity, the author date, + and the title of the change (taken from the first paragraph of the + commit log message). + +* The second and subsequent paragraphs of the commit log message. + +* The "patch", which is the "diff -p --stat" output (see + linkgit:git-diff[1]) between the commit and its parent. + +The log message and the patch is separated by a line with a +three-dash line. + There are two ways to specify which commits to operate on. 1. A single commit, <since>, specifies that the commits leading @@ -221,6 +238,11 @@ populated with placeholder text. `--subject-prefix` option) has ` v<n>` appended to it. E.g. `--reroll-count=4` may produce `v4-0001-add-makefile.patch` file that has "Subject: [PATCH v4 1/20] Add makefile" in it. + `<n>` does not have to be an integer (e.g. "--reroll-count=4.4", + or "--reroll-count=4rev2" are allowed), but the downside of + using such a reroll-count is that the range-diff/interdiff + with the previous version does not state exactly which + version the new interation is compared against. --to=<email>:: Add a `To:` header to the email headers. This is in addition @@ -718,6 +740,14 @@ use it only when you know the recipient uses Git to apply your patch. $ git format-patch -3 ------------ +CAVEATS +------- + +Note that `format-patch` will omit merge commits from the output, even +if they are part of the requested range. A simple "patch" does not +include enough information for the receiving end to reproduce the same +merge commit. + SEE ALSO -------- linkgit:git-am[1], linkgit:git-send-email[1] diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index 0c114ad1ca..853967dea0 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -117,12 +117,14 @@ NOTES 'git gc' tries very hard not to delete objects that are referenced anywhere in your repository. In particular, it will keep not only objects referenced by your current set of branches and tags, but also -objects referenced by the index, remote-tracking branches, notes saved -by 'git notes' under refs/notes/, reflogs (which may reference commits -in branches that were later amended or rewound), and anything else in -the refs/* namespace. If you are expecting some objects to be deleted -and they aren't, check all of those locations and decide whether it -makes sense in your case to remove those references. +objects referenced by the index, remote-tracking branches, reflogs +(which may reference commits in branches that were later amended or +rewound), and anything else in the refs/* namespace. Note that a note +(of the kind created by 'git notes') attached to an object does not +contribute in keeping the object alive. If you are expecting some +objects to be deleted and they aren't, check all of those locations +and decide whether it makes sense in your case to remove those +references. On the other hand, when 'git gc' runs concurrently with another process, there is a risk of it deleting an object that the other process is using diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 4e0ba8234a..3d393fbac1 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -38,38 +38,6 @@ are lists of one or more search expressions separated by newline characters. An empty string as search expression matches all lines. -CONFIGURATION -------------- - -grep.lineNumber:: - If set to true, enable `-n` option by default. - -grep.column:: - If set to true, enable the `--column` option by default. - -grep.patternType:: - Set the default matching behavior. Using a value of 'basic', 'extended', - 'fixed', or 'perl' will enable the `--basic-regexp`, `--extended-regexp`, - `--fixed-strings`, or `--perl-regexp` option accordingly, while the - value 'default' will return to the default matching behavior. - -grep.extendedRegexp:: - If set to true, enable `--extended-regexp` option by default. This - option is ignored when the `grep.patternType` option is set to a value - other than 'default'. - -grep.threads:: - Number of grep worker threads to use. If unset (or set to 0), Git will - use as many threads as the number of logical cores available. - -grep.fullName:: - If set to true, enable `--full-name` option by default. - -grep.fallbackToNoIndex:: - If set to true, fall back to git grep --no-index if git grep - is executed outside of a git repository. Defaults to false. - - OPTIONS ------- --cached:: @@ -363,6 +331,38 @@ with multiple threads might perform slower than single threaded if `--textconv` is given and there're too many text conversions. So if you experience low performance in this case, it might be desirable to use `--threads=1`. +CONFIGURATION +------------- + +grep.lineNumber:: + If set to true, enable `-n` option by default. + +grep.column:: + If set to true, enable the `--column` option by default. + +grep.patternType:: + Set the default matching behavior. Using a value of 'basic', 'extended', + 'fixed', or 'perl' will enable the `--basic-regexp`, `--extended-regexp`, + `--fixed-strings`, or `--perl-regexp` option accordingly, while the + value 'default' will return to the default matching behavior. + +grep.extendedRegexp:: + If set to true, enable `--extended-regexp` option by default. This + option is ignored when the `grep.patternType` option is set to a value + other than 'default'. + +grep.threads:: + Number of grep worker threads to use. If unset (or set to 0), Git will + use as many threads as the number of logical cores available. + +grep.fullName:: + If set to true, enable `--full-name` option by default. + +grep.fallbackToNoIndex:: + If set to true, fall back to git grep --no-index if git grep + is executed outside of a git repository. Defaults to false. + + GIT --- Part of the linkgit:git[1] suite diff --git a/Documentation/git-http-backend.txt b/Documentation/git-http-backend.txt index 558966aa83..0c5c0dde19 100644 --- a/Documentation/git-http-backend.txt +++ b/Documentation/git-http-backend.txt @@ -16,7 +16,9 @@ A simple CGI program to serve the contents of a Git repository to Git clients accessing the repository over http:// and https:// protocols. The program supports clients fetching using both the smart HTTP protocol and the backwards-compatible dumb HTTP protocol, as well as clients -pushing using the smart HTTP protocol. +pushing using the smart HTTP protocol. It also supports Git's +more-efficient "v2" protocol if properly configured; see the +discussion of `GIT_PROTOCOL` in the ENVIRONMENT section below. It verifies that the directory has the magic file "git-daemon-export-ok", and it will refuse to export any Git directory @@ -77,6 +79,18 @@ Apache 2.x:: SetEnv GIT_PROJECT_ROOT /var/www/git SetEnv GIT_HTTP_EXPORT_ALL ScriptAlias /git/ /usr/libexec/git-core/git-http-backend/ + +# This is not strictly necessary using Apache and a modern version of +# git-http-backend, as the webserver will pass along the header in the +# environment as HTTP_GIT_PROTOCOL, and http-backend will copy that into +# GIT_PROTOCOL. But you may need this line (or something similar if you +# are using a different webserver), or if you want to support older Git +# versions that did not do that copying. +# +# Having the webserver set up GIT_PROTOCOL is perfectly fine even with +# modern versions (and will take precedence over HTTP_GIT_PROTOCOL, +# which means it can be used to override the client's request). +SetEnvIf Git-Protocol ".*" GIT_PROTOCOL=$0 ---------------------------------------------------------------- + To enable anonymous read access but authenticated write access, @@ -264,6 +278,16 @@ a repository with an extremely large number of refs. The value can be specified with a unit (e.g., `100M` for 100 megabytes). The default is 10 megabytes. +Clients may probe for optional protocol capabilities (like the v2 +protocol) using the `Git-Protocol` HTTP header. In order to support +these, the contents of that header must appear in the `GIT_PROTOCOL` +environment variable. Most webservers will pass this header to the CGI +via the `HTTP_GIT_PROTOCOL` variable, and `git-http-backend` will +automatically copy that to `GIT_PROTOCOL`. However, some webservers may +be more selective about which headers they'll pass, in which case they +need to be configured explicitly (see the mention of `Git-Protocol` in +the Apache config from the earlier EXAMPLES section). + The backend process sets GIT_COMMITTER_NAME to '$REMOTE_USER' and GIT_COMMITTER_EMAIL to '$\{REMOTE_USER}@http.$\{REMOTE_ADDR\}', ensuring that any reflogs created by 'git-receive-pack' contain some diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt index 4deb4893f5..9fa17b60e4 100644 --- a/Documentation/git-http-fetch.txt +++ b/Documentation/git-http-fetch.txt @@ -41,11 +41,17 @@ commit-id:: <commit-id>['\t'<filename-as-in--w>] --packfile=<hash>:: - Instead of a commit id on the command line (which is not expected in + For internal use only. Instead of a commit id on the command + line (which is not expected in this case), 'git http-fetch' fetches the packfile directly at the given URL and uses index-pack to generate corresponding .idx and .keep files. The hash is used to determine the name of the temporary file and is - arbitrary. The output of index-pack is printed to stdout. + arbitrary. The output of index-pack is printed to stdout. Requires + --index-pack-args. + +--index-pack-args=<args>:: + For internal use only. The command to run on the contents of the + downloaded pack. Arguments are URL-encoded separated by spaces. --recover:: Verify that everything reachable from target is fetched. Used after diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt index af0c26232c..1f1e359225 100644 --- a/Documentation/git-index-pack.txt +++ b/Documentation/git-index-pack.txt @@ -9,17 +9,18 @@ git-index-pack - Build pack index file for an existing packed archive SYNOPSIS -------- [verse] -'git index-pack' [-v] [-o <index-file>] <pack-file> +'git index-pack' [-v] [-o <index-file>] [--[no-]rev-index] <pack-file> 'git index-pack' --stdin [--fix-thin] [--keep] [-v] [-o <index-file>] - [<pack-file>] + [--[no-]rev-index] [<pack-file>] DESCRIPTION ----------- Reads a packed archive (.pack) from the specified file, and -builds a pack index file (.idx) for it. The packed archive -together with the pack index can then be placed in the -objects/pack/ directory of a Git repository. +builds a pack index file (.idx) for it. Optionally writes a +reverse-index (.rev) for the specified pack. The packed +archive together with the pack index can then be placed in +the objects/pack/ directory of a Git repository. OPTIONS @@ -35,6 +36,13 @@ OPTIONS fails if the name of packed archive does not end with .pack). +--[no-]rev-index:: + When this flag is provided, generate a reverse index + (a `.rev` file) corresponding to the given pack. If + `--verify` is given, ensure that the existing + reverse index is correct. Takes precedence over + `pack.writeReverseIndex`. + --stdin:: When this flag is provided, the pack is read from stdin instead and a copy is then written to <pack-file>. If @@ -74,11 +82,22 @@ OPTIONS --strict:: Die, if the pack contains broken objects or links. +--progress-title:: + For internal use only. ++ +Set the title of the progress bar. The title is "Receiving objects" by +default and "Indexing objects" when `--stdin` is specified. + --check-self-contained-and-connected:: Die if the pack contains broken links. For internal use only. --fsck-objects:: - Die if the pack contains broken objects. For internal use only. + For internal use only. ++ +Die if the pack contains broken objects. If the pack contains a tree +pointing to a .gitmodules blob that does not exist, prints the hash of +that blob (for the caller to check) after the hash that goes into the +name of the pack/idx file (see "Notes"). --threads=<n>:: Specifies the number of threads to spawn when resolving diff --git a/Documentation/git-interpret-trailers.txt b/Documentation/git-interpret-trailers.txt index 96ec6499f0..956a01d184 100644 --- a/Documentation/git-interpret-trailers.txt +++ b/Documentation/git-interpret-trailers.txt @@ -232,25 +232,38 @@ trailer.<token>.ifmissing:: that option for trailers with the specified <token>. trailer.<token>.command:: - This option can be used to specify a shell command that will - be called to automatically add or modify a trailer with the - specified <token>. + This option behaves in the same way as 'trailer.<token>.cmd', except + that it doesn't pass anything as argument to the specified command. + Instead the first occurrence of substring $ARG is replaced by the + value that would be passed as argument. + -When this option is specified, the behavior is as if a special -'<token>=<value>' argument were added at the beginning of the command -line, where <value> is taken to be the standard output of the -specified command with any leading and trailing whitespace trimmed -off. +The 'trailer.<token>.command' option has been deprecated in favor of +'trailer.<token>.cmd' due to the fact that $ARG in the user's command is +only replaced once and that the original way of replacing $ARG is not safe. + -If the command contains the `$ARG` string, this string will be -replaced with the <value> part of an existing trailer with the same -<token>, if any, before the command is launched. +When both 'trailer.<token>.cmd' and 'trailer.<token>.command' are given +for the same <token>, 'trailer.<token>.cmd' is used and +'trailer.<token>.command' is ignored. + +trailer.<token>.cmd:: + This option can be used to specify a shell command that will be called: + once to automatically add a trailer with the specified <token>, and then + each time a '--trailer <token>=<value>' argument to modify the <value> of + the trailer that this option would produce. + -If some '<token>=<value>' arguments are also passed on the command -line, when a 'trailer.<token>.command' is configured, the command will -also be executed for each of these arguments. And the <value> part of -these arguments, if any, will be used to replace the `$ARG` string in -the command. +When the specified command is first called to add a trailer +with the specified <token>, the behavior is as if a special +'--trailer <token>=<value>' argument was added at the beginning +of the "git interpret-trailers" command, where <value> +is taken to be the standard output of the command with any +leading and trailing whitespace trimmed off. ++ +If some '--trailer <token>=<value>' arguments are also passed +on the command line, the command is called again once for each +of these arguments with the same <token>. And the <value> part +of these arguments, if any, will be passed to the command as its +first argument. This way the command can produce a <value> computed +from the <value> passed in the '--trailer <token>=<value>' argument. EXAMPLES -------- @@ -333,6 +346,55 @@ subject Fix #42 ------------ +* Configure a 'help' trailer with a cmd use a script `glog-find-author` + which search specified author identity from git log in git repository + and show how it works: ++ +------------ +$ cat ~/bin/glog-find-author +#!/bin/sh +test -n "$1" && git log --author="$1" --pretty="%an <%ae>" -1 || true +$ git config trailer.help.key "Helped-by: " +$ git config trailer.help.ifExists "addIfDifferentNeighbor" +$ git config trailer.help.cmd "~/bin/glog-find-author" +$ git interpret-trailers --trailer="help:Junio" --trailer="help:Couder" <<EOF +> subject +> +> message +> +> EOF +subject + +message + +Helped-by: Junio C Hamano <gitster@pobox.com> +Helped-by: Christian Couder <christian.couder@gmail.com> +------------ + +* Configure a 'ref' trailer with a cmd use a script `glog-grep` + to grep last relevant commit from git log in the git repository + and show how it works: ++ +------------ +$ cat ~/bin/glog-grep +#!/bin/sh +test -n "$1" && git log --grep "$1" --pretty=reference -1 || true +$ git config trailer.ref.key "Reference-to: " +$ git config trailer.ref.ifExists "replace" +$ git config trailer.ref.cmd "~/bin/glog-grep" +$ git interpret-trailers --trailer="ref:Add copyright notices." <<EOF +> subject +> +> message +> +> EOF +subject + +message + +Reference-to: 8bc9a0c769 (Add copyright notices., 2005-04-07) +------------ + * Configure a 'see' trailer with a command to show the subject of a commit that is related, and show how it works: + diff --git a/Documentation/git-log.txt b/Documentation/git-log.txt index dd189a353a..0498e7bacb 100644 --- a/Documentation/git-log.txt +++ b/Documentation/git-log.txt @@ -39,7 +39,9 @@ OPTIONS full ref name (including prefix) will be printed. If 'auto' is specified, then if the output is going to a terminal, the ref names are shown as if 'short' were given, otherwise no ref names are - shown. The default option is 'short'. + shown. The option `--decorate` is short-hand for `--decorate=short`. + Default to configuration value of `log.decorate` if configured, + otherwise, `auto`. --decorate-refs=<pattern>:: --decorate-refs-exclude=<pattern>:: @@ -107,47 +109,15 @@ DIFF FORMATTING By default, `git log` does not generate any diff output. The options below can be used to show the changes made by each commit. -Note that unless one of `-c`, `--cc`, or `-m` is given, merge commits -will never show a diff, even if a diff format like `--patch` is -selected, nor will they match search options like `-S`. The exception is -when `--first-parent` is in use, in which merges are treated like normal -single-parent commits (this can be overridden by providing a -combined-diff option or with `--no-diff-merges`). - --c:: - With this option, diff output for a merge commit - shows the differences from each of the parents to the merge result - simultaneously instead of showing pairwise diff between a parent - and the result one at a time. Furthermore, it lists only files - which were modified from all parents. - ---cc:: - This flag implies the `-c` option and further compresses the - patch output by omitting uninteresting hunks whose contents in - the parents have only two variants and the merge result picks - one of them without modification. - ---combined-all-paths:: - This flag causes combined diffs (used for merge commits) to - list the name of the file from all parents. It thus only has - effect when -c or --cc are specified, and is likely only - useful if filename changes are detected (i.e. when either - rename or copy detection have been requested). - --m:: - This flag makes the merge commits show the full diff like - regular commits; for each merge parent, a separate log entry - and diff is generated. An exception is that only diff against - the first parent is shown when `--first-parent` option is given; - in that case, the output represents the changes the merge - brought _into_ the then-current branch. - ---diff-merges=off:: ---no-diff-merges:: - Disable output of diffs for merge commits (default). Useful to - override `-m`, `-c`, or `--cc`. +Note that unless one of `--diff-merges` variants (including short +`-m`, `-c`, and `--cc` options) is explicitly given, merge commits +will not show a diff, even if a diff format like `--patch` is +selected, nor will they match search options like `-S`. The exception +is when `--first-parent` is in use, in which case `first-parent` is +the default format. :git-log: 1 +:diff-merges-default: `off` include::diff-options.txt[] include::diff-generate-patch.txt[] diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt index 0a3b5265b3..6d11ab506b 100644 --- a/Documentation/git-ls-files.txt +++ b/Documentation/git-ls-files.txt @@ -13,6 +13,7 @@ SYNOPSIS (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])* (-[c|d|o|i|s|u|k|m])* [--eol] + [--deduplicate] [-x <pattern>|--exclude=<pattern>] [-X <file>|--exclude-from=<file>] [--exclude-per-directory=<file>] @@ -80,6 +81,13 @@ OPTIONS \0 line termination on output and do not quote filenames. See OUTPUT below for more information. +--deduplicate:: + When only filenames are shown, suppress duplicates that may + come from having multiple stages during a merge, or giving + `--deleted` and `--modified` option at the same time. + When any of the `-t`, `--unmerged`, or `--stage` option is + in use, this option has no effect. + -x <pattern>:: --exclude=<pattern>:: Skip untracked files matching pattern. diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt index 7a6aed0e30..3fcfd965fd 100644 --- a/Documentation/git-mailinfo.txt +++ b/Documentation/git-mailinfo.txt @@ -9,7 +9,9 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message SYNOPSIS -------- [verse] -'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] [--[no-]scissors] <msg> <patch> +'git mailinfo' [-k|-b] [-u | --encoding=<encoding> | -n] + [--[no-]scissors] [--quoted-cr=<action>] + <msg> <patch> DESCRIPTION @@ -53,7 +55,7 @@ character. The commit log message, author name and author email are taken from the e-mail, and after minimally decoding MIME transfer encoding, re-coded in the charset specified by - i18n.commitencoding (defaulting to UTF-8) by transliterating + `i18n.commitEncoding` (defaulting to UTF-8) by transliterating them. This used to be optional but now it is the default. + Note that the patch is always used as-is without charset @@ -61,7 +63,7 @@ conversion, even with this flag. --encoding=<encoding>:: Similar to -u. But when re-coding, the charset specified here is - used instead of the one specified by i18n.commitencoding or UTF-8. + used instead of the one specified by `i18n.commitEncoding` or UTF-8. -n:: Disable all charset re-coding of the metadata. @@ -89,6 +91,23 @@ This can be enabled by default with the configuration option mailinfo.scissors. --no-scissors:: Ignore scissors lines. Useful for overriding mailinfo.scissors settings. +--quoted-cr=<action>:: + Action when processes email messages sent with base64 or + quoted-printable encoding, and the decoded lines end with a CRLF + instead of a simple LF. ++ +The valid actions are: ++ +-- +* `nowarn`: Git will do nothing when such a CRLF is found. +* `warn`: Git will issue a warning for each message if such a CRLF is + found. +* `strip`: Git will convert those CRLF to LF. +-- ++ +The default action could be set by configuration option `mailinfo.quotedCR`. +If no such configuration option has been set, `warn` will be used. + <msg>:: The commit log message extracted from e-mail, usually except the title line which comes from e-mail Subject. diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 6fe1e5e105..e2cfb68ab5 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -92,10 +92,8 @@ commit-graph:: prefetch:: The `prefetch` task updates the object directory with the latest objects from all registered remotes. For each remote, a `git fetch` - command is run. The refmap is custom to avoid updating local or remote - branches (those in `refs/heads` or `refs/remotes`). Instead, the - remote refs are stored in `refs/prefetch/<remote>/`. Also, tags are - not updated. + command is run. The configured refspec is modified to place all + requested refs within `refs/prefetch/`. Also, tags are not updated. + This is done to avoid disrupting the remote-tracking branches. The end users expect these refs to stay unmoved unless they initiate a fetch. With prefetch @@ -145,6 +143,12 @@ incremental-repack:: which is a special case that attempts to repack all pack-files into a single pack-file. +pack-refs:: + The `pack-refs` task collects the loose reference files and + collects them into a single file. This speeds up operations that + need to iterate across many references. See linkgit:git-pack-refs[1] + for more information. + OPTIONS ------- --auto:: @@ -175,6 +179,17 @@ OPTIONS `maintenance.<task>.enabled` configured as `true` are considered. See the 'TASKS' section for the list of accepted `<task>` values. +--scheduler=auto|crontab|systemd-timer|launchctl|schtasks:: + When combined with the `start` subcommand, specify the scheduler + for running the hourly, daily and weekly executions of + `git maintenance run`. + Possible values for `<scheduler>` are `auto`, `crontab` + (POSIX), `systemd-timer` (Linux), `launchctl` (macOS), and + `schtasks` (Windows). When `auto` is specified, the + appropriate platform-specific scheduler is used; on Linux, + `systemd-timer` is used if available, otherwise + `crontab`. Default is `auto`. + TROUBLESHOOTING --------------- @@ -218,6 +233,168 @@ Further, the `git gc` command should not be combined with but does not take the lock in the same way as `git maintenance run`. If possible, use `git maintenance run --task=gc` instead of `git gc`. +The following sections describe the mechanisms put in place to run +background maintenance by `git maintenance start` and how to customize +them. + +BACKGROUND MAINTENANCE ON POSIX SYSTEMS +--------------------------------------- + +The standard mechanism for scheduling background tasks on POSIX systems +is cron(8). This tool executes commands based on a given schedule. The +current list of user-scheduled tasks can be found by running `crontab -l`. +The schedule written by `git maintenance start` is similar to this: + +----------------------------------------------------------------------- +# BEGIN GIT MAINTENANCE SCHEDULE +# The following schedule was created by Git +# Any edits made in this region might be +# replaced in the future by a Git command. + +0 1-23 * * * "/<path>/git" --exec-path="/<path>" for-each-repo --config=maintenance.repo maintenance run --schedule=hourly +0 0 * * 1-6 "/<path>/git" --exec-path="/<path>" for-each-repo --config=maintenance.repo maintenance run --schedule=daily +0 0 * * 0 "/<path>/git" --exec-path="/<path>" for-each-repo --config=maintenance.repo maintenance run --schedule=weekly + +# END GIT MAINTENANCE SCHEDULE +----------------------------------------------------------------------- + +The comments are used as a region to mark the schedule as written by Git. +Any modifications within this region will be completely deleted by +`git maintenance stop` or overwritten by `git maintenance start`. + +The `crontab` entry specifies the full path of the `git` executable to +ensure that the executed `git` command is the same one with which +`git maintenance start` was issued independent of `PATH`. If the same user +runs `git maintenance start` with multiple Git executables, then only the +latest executable is used. + +These commands use `git for-each-repo --config=maintenance.repo` to run +`git maintenance run --schedule=<frequency>` on each repository listed in +the multi-valued `maintenance.repo` config option. These are typically +loaded from the user-specific global config. The `git maintenance` process +then determines which maintenance tasks are configured to run on each +repository with each `<frequency>` using the `maintenance.<task>.schedule` +config options. These values are loaded from the global or repository +config values. + +If the config values are insufficient to achieve your desired background +maintenance schedule, then you can create your own schedule. If you run +`crontab -e`, then an editor will load with your user-specific `cron` +schedule. In that editor, you can add your own schedule lines. You could +start by adapting the default schedule listed earlier, or you could read +the crontab(5) documentation for advanced scheduling techniques. Please +do use the full path and `--exec-path` techniques from the default +schedule to ensure you are executing the correct binaries in your +schedule. + + +BACKGROUND MAINTENANCE ON LINUX SYSTEMD SYSTEMS +----------------------------------------------- + +While Linux supports `cron`, depending on the distribution, `cron` may +be an optional package not necessarily installed. On modern Linux +distributions, systemd timers are superseding it. + +If user systemd timers are available, they will be used as a replacement +of `cron`. + +In this case, `git maintenance start` will create user systemd timer units +and start the timers. The current list of user-scheduled tasks can be found +by running `systemctl --user list-timers`. The timers written by `git +maintenance start` are similar to this: + +----------------------------------------------------------------------- +$ systemctl --user list-timers +NEXT LEFT LAST PASSED UNIT ACTIVATES +Thu 2021-04-29 19:00:00 CEST 42min left Thu 2021-04-29 18:00:11 CEST 17min ago git-maintenance@hourly.timer git-maintenance@hourly.service +Fri 2021-04-30 00:00:00 CEST 5h 42min left Thu 2021-04-29 00:00:11 CEST 18h ago git-maintenance@daily.timer git-maintenance@daily.service +Mon 2021-05-03 00:00:00 CEST 3 days left Mon 2021-04-26 00:00:11 CEST 3 days ago git-maintenance@weekly.timer git-maintenance@weekly.service +----------------------------------------------------------------------- + +One timer is registered for each `--schedule=<frequency>` option. + +The definition of the systemd units can be inspected in the following files: + +----------------------------------------------------------------------- +~/.config/systemd/user/git-maintenance@.timer +~/.config/systemd/user/git-maintenance@.service +~/.config/systemd/user/timers.target.wants/git-maintenance@hourly.timer +~/.config/systemd/user/timers.target.wants/git-maintenance@daily.timer +~/.config/systemd/user/timers.target.wants/git-maintenance@weekly.timer +----------------------------------------------------------------------- + +`git maintenance start` will overwrite these files and start the timer +again with `systemctl --user`, so any customization should be done by +creating a drop-in file, i.e. a `.conf` suffixed file in the +`~/.config/systemd/user/git-maintenance@.service.d` directory. + +`git maintenance stop` will stop the user systemd timers and delete +the above mentioned files. + +For more details, see `systemd.timer(5)`. + + +BACKGROUND MAINTENANCE ON MACOS SYSTEMS +--------------------------------------- + +While macOS technically supports `cron`, using `crontab -e` requires +elevated privileges and the executed process does not have a full user +context. Without a full user context, Git and its credential helpers +cannot access stored credentials, so some maintenance tasks are not +functional. + +Instead, `git maintenance start` interacts with the `launchctl` tool, +which is the recommended way to schedule timed jobs in macOS. Scheduling +maintenance through `git maintenance (start|stop)` requires some +`launchctl` features available only in macOS 10.11 or later. + +Your user-specific scheduled tasks are stored as XML-formatted `.plist` +files in `~/Library/LaunchAgents/`. You can see the currently-registered +tasks using the following command: + +----------------------------------------------------------------------- +$ ls ~/Library/LaunchAgents/org.git-scm.git* +org.git-scm.git.daily.plist +org.git-scm.git.hourly.plist +org.git-scm.git.weekly.plist +----------------------------------------------------------------------- + +One task is registered for each `--schedule=<frequency>` option. To +inspect how the XML format describes each schedule, open one of these +`.plist` files in an editor and inspect the `<array>` element following +the `<key>StartCalendarInterval</key>` element. + +`git maintenance start` will overwrite these files and register the +tasks again with `launchctl`, so any customizations should be done by +creating your own `.plist` files with distinct names. Similarly, the +`git maintenance stop` command will unregister the tasks with `launchctl` +and delete the `.plist` files. + +To create more advanced customizations to your background tasks, see +launchctl.plist(5) for more information. + + +BACKGROUND MAINTENANCE ON WINDOWS SYSTEMS +----------------------------------------- + +Windows does not support `cron` and instead has its own system for +scheduling background tasks. The `git maintenance start` command uses +the `schtasks` command to submit tasks to this system. You can inspect +all background tasks using the Task Scheduler application. The tasks +added by Git have names of the form `Git Maintenance (<frequency>)`. +The Task Scheduler GUI has ways to inspect these tasks, but you can also +export the tasks to XML files and view the details there. + +Note that since Git is a console application, these background tasks +create a console window visible to the current user. This can be changed +manually by selecting the "Run whether user is logged in or not" option +in Task Scheduler. This change requires a password input, which is why +`git maintenance start` does not select it by default. + +If you want to customize the background tasks, please rename the tasks +so future calls to `git maintenance (start|stop)` do not overwrite your +custom tasks. + GIT --- diff --git a/Documentation/git-merge.txt b/Documentation/git-merge.txt index 3819fadac1..e4f3352eb5 100644 --- a/Documentation/git-merge.txt +++ b/Documentation/git-merge.txt @@ -61,6 +61,8 @@ merge has resulted in conflicts. OPTIONS ------- +:git-merge: 1 + include::merge-options.txt[] -m <msg>:: diff --git a/Documentation/git-mergetool--lib.txt b/Documentation/git-mergetool--lib.txt index 4da9d24096..3e8f59ac0e 100644 --- a/Documentation/git-mergetool--lib.txt +++ b/Documentation/git-mergetool--lib.txt @@ -38,6 +38,10 @@ get_merge_tool_cmd:: get_merge_tool_path:: returns the custom path for a merge tool. +initialize_merge_tool:: + bring merge tool specific functions into scope so they can be used or + overridden. + run_merge_tool:: launches a merge tool given the tool name and a true/false flag to indicate whether a merge base is present. diff --git a/Documentation/git-mergetool.txt b/Documentation/git-mergetool.txt index 6b14702e78..e587c7763a 100644 --- a/Documentation/git-mergetool.txt +++ b/Documentation/git-mergetool.txt @@ -99,6 +99,10 @@ success of the resolution after the custom tool has exited. (see linkgit:git-config[1]). To cancel `diff.orderFile`, use `-O/dev/null`. +CONFIGURATION +------------- +include::config/mergetool.txt[] + TEMPORARY FILES --------------- `git mergetool` creates `*.orig` backup files while resolving merges. diff --git a/Documentation/git-mktag.txt b/Documentation/git-mktag.txt index fa6a756123..466a697519 100644 --- a/Documentation/git-mktag.txt +++ b/Documentation/git-mktag.txt @@ -3,7 +3,7 @@ git-mktag(1) NAME ---- -git-mktag - Creates a tag object +git-mktag - Creates a tag object with extra validation SYNOPSIS @@ -13,23 +13,50 @@ SYNOPSIS DESCRIPTION ----------- -Reads a tag contents on standard input and creates a tag object -that can also be used to sign other objects. -The output is the new tag's <object> identifier. +Reads a tag contents on standard input and creates a tag object. The +output is the new tag's <object> identifier. + +This command is mostly equivalent to linkgit:git-hash-object[1] +invoked with `-t tag -w --stdin`. I.e. both of these will create and +write a tag found in `my-tag`: + + git mktag <my-tag + git hash-object -t tag -w --stdin <my-tag + +The difference is that mktag will die before writing the tag if the +tag doesn't pass a linkgit:git-fsck[1] check. + +The "fsck" check done mktag is stricter than what linkgit:git-fsck[1] +would run by default in that all `fsck.<msg-id>` messages are promoted +from warnings to errors (so e.g. a missing "tagger" line is an error). + +Extra headers in the object are also an error under mktag, but ignored +by linkgit:git-fsck[1]. This extra check can be turned off by setting +the appropriate `fsck.<msg-id>` varible: + + git -c fsck.extraHeaderEntry=ignore mktag <my-tag-with-headers + +OPTIONS +------- + +--strict:: + By default mktag turns on the equivalent of + linkgit:git-fsck[1] `--strict` mode. Use `--no-strict` to + disable it. Tag Format ---------- A tag signature file, to be fed to this command's standard input, has a very simple fixed format: four lines of - object <sha1> + object <hash> type <typename> tag <tagname> tagger <tagger> followed by some 'optional' free-form message (some tags created -by older Git may not have `tagger` line). The message, when +by older Git may not have `tagger` line). The message, when it exists, is separated by a blank line from the header. The message part may contain a signature that Git itself doesn't care about, but that can be verified with gpg. diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index eb0caa0439..3b0b55cd75 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -9,7 +9,7 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress] <subcommand> +'git multi-pack-index' [--object-dir=<dir>] [--[no-]bitmap] <sub-command> DESCRIPTION ----------- @@ -22,15 +22,30 @@ OPTIONS Use given directory for the location of Git objects. We check `<dir>/packs/multi-pack-index` for the current MIDX file, and `<dir>/packs` for the pack-files to index. ++ +`<dir>` must be an alternate of the current repository. --[no-]progress:: Turn progress on/off explicitly. If neither is specified, progress is - shown if standard error is connected to a terminal. + shown if standard error is connected to a terminal. Supported by + sub-commands `write`, `verify`, `expire`, and `repack. The following subcommands are available: write:: - Write a new MIDX file. + Write a new MIDX file. The following options are available for + the `write` sub-command: ++ +-- + --preferred-pack=<pack>:: + Optionally specify the tie-breaking pack used when + multiple packs contain the same object. `<pack>` must + contain at least one object. If not given, ties are + broken in favor of the pack with the lowest mtime. + + --[no-]bitmap:: + Control whether or not a multi-pack bitmap is written. +-- verify:: Verify the contents of the MIDX file. @@ -71,6 +86,13 @@ EXAMPLES $ git multi-pack-index write ----------------------------------------------- +* Write a MIDX file for the packfiles in the current .git folder with a +corresponding bitmap. ++ +------------------------------------------------------------- +$ git multi-pack-index write --preferred-pack=<pack> --bitmap +------------------------------------------------------------- + * Write a MIDX file for the packfiles in an alternate object store. + ----------------------------------------------- diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index f89e68b424..38e5257b2a 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -762,3 +762,7 @@ IMPLEMENTATION DETAILS message indicating the p4 depot location and change number. This line is used by later 'git p4 sync' operations to know which p4 changes are new. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 54d715ead1..dbfd1f9017 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -85,6 +85,16 @@ base-name:: reference was included in the resulting packfile. This can be useful to send new tags to native Git clients. +--stdin-packs:: + Read the basenames of packfiles (e.g., `pack-1234abcd.pack`) + from the standard input, instead of object names or revision + arguments. The resulting pack contains all objects listed in the + included packs (those not beginning with `^`), excluding any + objects listed in the excluded packs (beginning with `^`). ++ +Incompatible with `--revs`, or options that imply `--revs` (such as +`--all`), with the exception of `--unpacked`, which is compatible. + --window=<n>:: --depth=<n>:: These two options affect how the objects contained in @@ -118,10 +128,10 @@ depth is 4095. into multiple independent packfiles, each not larger than the given size. The size can be suffixed with "k", "m", or "g". The minimum size allowed is limited to 1 MiB. - This option - prevents the creation of a bitmap index. The default is unlimited, unless the config variable - `pack.packSizeLimit` is set. + `pack.packSizeLimit` is set. Note that this option may result in + a larger and slower repository; see the discussion in + `pack.packSizeLimit`. --honor-pack-keep:: This flag causes an object already in a local pack that @@ -400,6 +410,17 @@ Note that we pick a single island for each regex to go into, using "last one wins" ordering (which allows repo-specific config to take precedence over user-wide config, and so forth). + +CONFIGURATION +------------- + +Various configuration variables affect packing, see +linkgit:git-config[1] (search for "pack" and "delta"). + +Notably, delta compression is not used on objects larger than the +`core.bigFileThreshold` configuration variable and on files with the +attribute `delta` set to false. + SEE ALSO -------- linkgit:git-rev-list[1] diff --git a/Documentation/git-pull.txt b/Documentation/git-pull.txt index 5c3fb67c01..aef757ec89 100644 --- a/Documentation/git-pull.txt +++ b/Documentation/git-pull.txt @@ -15,14 +15,17 @@ SYNOPSIS DESCRIPTION ----------- -Incorporates changes from a remote repository into the current -branch. In its default mode, `git pull` is shorthand for -`git fetch` followed by `git merge FETCH_HEAD`. - -More precisely, 'git pull' runs 'git fetch' with the given -parameters and calls 'git merge' to merge the retrieved branch -heads into the current branch. -With `--rebase`, it runs 'git rebase' instead of 'git merge'. +Incorporates changes from a remote repository into the current branch. +If the current branch is behind the remote, then by default it will +fast-forward the current branch to match the remote. If the current +branch and the remote have diverged, the user needs to specify how to +reconcile the divergent branches with `--rebase` or `--no-rebase` (or +the corresponding configuration option in `pull.rebase`). + +More precisely, `git pull` runs `git fetch` with the given parameters +and then depending on configuration options or command line flags, +will call either `git rebase` or `git merge` to reconcile diverging +branches. <repository> should be the name of a remote repository as passed to linkgit:git-fetch[1]. <refspec> can name an @@ -117,7 +120,7 @@ When set to `preserve` (deprecated in favor of `merges`), rebase with the `--preserve-merges` option passed to `git rebase` so that locally created merge commits will not be flattened. + -When false, merge the current branch into the upstream branch. +When false, merge the upstream branch into the current branch. + When `interactive`, enable the interactive mode of rebase. + @@ -132,7 +135,7 @@ published that history already. Do *not* use this option unless you have read linkgit:git-rebase[1] carefully. --no-rebase:: - Override earlier --rebase. + This is shorthand for --rebase=false. Options related to fetching ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/git-push.txt b/Documentation/git-push.txt index ab103c82cf..2f25aa3a29 100644 --- a/Documentation/git-push.txt +++ b/Documentation/git-push.txt @@ -244,8 +244,8 @@ Imagine that you have to rebase what you have already published. You will have to bypass the "must fast-forward" rule in order to replace the history you originally published with the rebased history. If somebody else built on top of your original history while you are -rebasing, the tip of the branch at the remote may advance with her -commit, and blindly pushing with `--force` will lose her work. +rebasing, the tip of the branch at the remote may advance with their +commit, and blindly pushing with `--force` will lose their work. + This option allows you to say that you expect the history you are updating is what you rebased and want to replace. If the remote ref @@ -600,7 +600,7 @@ EXAMPLES `git push origin`:: Without additional configuration, pushes the current branch to - the configured upstream (`remote.origin.merge` configuration + the configured upstream (`branch.<name>.merge` configuration variable) if it has the same name as the current branch, and errors out without pushing otherwise. + diff --git a/Documentation/git-range-diff.txt b/Documentation/git-range-diff.txt index 9701c1e5fd..fe350d7f40 100644 --- a/Documentation/git-range-diff.txt +++ b/Documentation/git-range-diff.txt @@ -10,6 +10,7 @@ SYNOPSIS [verse] 'git range-diff' [--color=[<when>]] [--no-color] [<diff-options>] [--no-dual-color] [--creation-factor=<factor>] + [--left-only | --right-only] ( <range1> <range2> | <rev1>...<rev2> | <base> <rev1> <rev2> ) DESCRIPTION @@ -28,6 +29,17 @@ Finally, the list of matching commits is shown in the order of the second commit range, with unmatched commits being inserted just after all of their ancestors have been shown. +There are three ways to specify the commit ranges: + +- `<range1> <range2>`: Either commit range can be of the form + `<base>..<rev>`, `<rev>^!` or `<rev>^-<n>`. See `SPECIFYING RANGES` + in linkgit:gitrevisions[7] for more details. + +- `<rev1>...<rev2>`. This is equivalent to + `<rev2>..<rev1> <rev1>..<rev2>`. + +- `<base> <rev1> <rev2>`: This is equivalent to `<base>..<rev1> + <base>..<rev2>`. OPTIONS ------- @@ -57,6 +69,14 @@ to revert to color all lines according to the outer diff markers See the ``Algorithm`` section below for an explanation why this is needed. +--left-only:: + Suppress commits that are missing from the first specified range + (or the "left range" when using the `<rev1>...<rev2>` format). + +--right-only:: + Suppress commits that are missing from the second specified range + (or the "right range" when using the `<rev1>...<rev2>` format). + --[no-]notes[=<ref>]:: This flag is passed to the `git log` program (see linkgit:git-log[1]) that generates the patches. diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt index a0487b5cc5..506345cb0e 100644 --- a/Documentation/git-rebase.txt +++ b/Documentation/git-rebase.txt @@ -79,9 +79,10 @@ remain the checked-out branch. If the upstream branch already contains a change you have made (e.g., because you mailed a patch which was applied upstream), then that commit -will be skipped. For example, running `git rebase master` on the -following history (in which `A'` and `A` introduce the same set of changes, -but have different committer information): +will be skipped and warnings will be issued (if the `merge` backend is +used). For example, running `git rebase master` on the following +history (in which `A'` and `A` introduce the same set of changes, but +have different committer information): ------------ A---B---C topic @@ -200,12 +201,6 @@ Alternatively, you can undo the 'git rebase' with git rebase --abort -CONFIGURATION -------------- - -include::config/rebase.txt[] -include::config/sequencer.txt[] - OPTIONS ------- --onto <newbase>:: @@ -318,7 +313,10 @@ See also INCOMPATIBLE OPTIONS below. By default (or if `--no-reapply-cherry-picks` is given), these commits will be automatically dropped. Because this necessitates reading all upstream commits, this can be expensive in repos with a large number -of upstream commits that need to be read. +of upstream commits that need to be read. When using the `merge` +backend, warnings will be issued for each dropped commit (unless +`--quiet` is given). Advice will also be issued unless +`advice.skippedCherryPicks` is set to false (see linkgit:git-config[1]). + `--reapply-cherry-picks` allows rebase to forgo reading all upstream commits, potentially improving performance. @@ -346,9 +344,7 @@ See also INCOMPATIBLE OPTIONS below. -m:: --merge:: - Use merging strategies to rebase. When the recursive (default) merge - strategy is used, this allows rebase to be aware of renames on the - upstream side. This is the default. + Using merging strategies to rebase (default). + Note that a rebase merge works by replaying each commit from the working branch on top of the <upstream> branch. Because of this, when a merge @@ -360,9 +356,8 @@ See also INCOMPATIBLE OPTIONS below. -s <strategy>:: --strategy=<strategy>:: - Use the given merge strategy. - If there is no `-s` option 'git merge-recursive' is used - instead. This implies --merge. + Use the given merge strategy, instead of the default `ort`. + This implies `--merge`. + Because 'git rebase' replays each commit from the working branch on top of the <upstream> branch using the given strategy, using @@ -375,7 +370,7 @@ See also INCOMPATIBLE OPTIONS below. --strategy-option=<strategy-option>:: Pass the <strategy-option> through to the merge strategy. This implies `--merge` and, if no strategy has been - specified, `-s recursive`. Note the reversal of 'ours' and + specified, `-s ort`. Note the reversal of 'ours' and 'theirs' as noted above for the `-m` option. + See also INCOMPATIBLE OPTIONS below. @@ -536,7 +531,7 @@ The `--rebase-merges` mode is similar in spirit to the deprecated where commits can be reordered, inserted and dropped at will. + It is currently only possible to recreate the merge commits using the -`recursive` merge strategy; Different merge strategies can be used only via +`ort` merge strategy; different merge strategies can be used only via explicit `exec git merge -s <strategy> [...]` commands. + See also REBASING MERGES and INCOMPATIBLE OPTIONS below. @@ -593,16 +588,17 @@ See also INCOMPATIBLE OPTIONS below. --autosquash:: --no-autosquash:: - When the commit log message begins with "squash! ..." (or - "fixup! ..."), and there is already a commit in the todo list that - matches the same `...`, automatically modify the todo list of rebase - -i so that the commit marked for squashing comes right after the - commit to be modified, and change the action of the moved commit - from `pick` to `squash` (or `fixup`). A commit matches the `...` if - the commit subject matches, or if the `...` refers to the commit's - hash. As a fall-back, partial matches of the commit subject work, - too. The recommended way to create fixup/squash commits is by using - the `--fixup`/`--squash` options of linkgit:git-commit[1]. + When the commit log message begins with "squash! ..." or "fixup! ..." + or "amend! ...", and there is already a commit in the todo list that + matches the same `...`, automatically modify the todo list of + `rebase -i`, so that the commit marked for squashing comes right after + the commit to be modified, and change the action of the moved commit + from `pick` to `squash` or `fixup` or `fixup -C` respectively. A commit + matches the `...` if the commit subject matches, or if the `...` refers + to the commit's hash. As a fall-back, partial matches of the commit + subject work, too. The recommended way to create fixup/amend/squash + commits is by using the `--fixup`, `--fixup=amend:` or `--fixup=reword:` + and `--squash` options respectively of linkgit:git-commit[1]. + If the `--autosquash` option is enabled by default using the configuration variable `rebase.autoSquash`, this option can be @@ -622,6 +618,14 @@ See also INCOMPATIBLE OPTIONS below. --no-reschedule-failed-exec:: Automatically reschedule `exec` commands that failed. This only makes sense in interactive mode (or when an `--exec` option was provided). ++ +Even though this option applies once a rebase is started, it's set for +the whole rebase at the start based on either the +`rebase.rescheduleFailedExec` configuration (see linkgit:git-config[1] +or "CONFIGURATION" below) or whether this option is +provided. Otherwise an explicit `--no-reschedule-failed-exec` at the +start would be overridden by the presence of +`rebase.rescheduleFailedExec=true` configuration. INCOMPATIBLE OPTIONS -------------------- @@ -887,9 +891,17 @@ If you want to fold two or more commits into one, replace the command "pick" for the second and subsequent commits with "squash" or "fixup". If the commits had different authors, the folded commit will be attributed to the author of the first commit. The suggested commit -message for the folded commit is the concatenation of the commit -messages of the first commit and of those with the "squash" command, -but omits the commit messages of commits with the "fixup" command. +message for the folded commit is the concatenation of the first +commit's message with those identified by "squash" commands, omitting the +messages of commits identified by "fixup" commands, unless "fixup -c" +is used. In that case the suggested commit message is only the message +of the "fixup -c" commit, and an editor is opened allowing you to edit +the message. The contents (patch) of the "fixup -c" commit are still +incorporated into the folded commit. If there is more than one "fixup -c" +commit, the message from the final one is used. You can also use +"fixup -C" to get the same behavior as "fixup -c" except without opening +an editor. + 'git rebase' will stop when "pick" has been replaced with "edit" or when a command fails due to merge errors. When you are done editing @@ -1208,12 +1220,16 @@ successful merge so that the user can edit the message. If a `merge` command fails for any reason other than merge conflicts (i.e. when the merge operation did not even start), it is rescheduled immediately. -At this time, the `merge` command will *always* use the `recursive` -merge strategy for regular merges, and `octopus` for octopus merges, -with no way to choose a different one. To work around -this, an `exec` command can be used to call `git merge` explicitly, -using the fact that the labels are worktree-local refs (the ref -`refs/rewritten/onto` would correspond to the label `onto`, for example). +By default, the `merge` command will use the `ort` merge strategy for +regular merges, and `octopus` for octopus merges. One can specify a +default strategy for all merges using the `--strategy` argument when +invoking rebase, or can override specific merges in the interactive +list of commands by using an `exec` command to call `git merge` +explicitly with a `--strategy` argument. Note that when calling `git +merge` explicitly like this, you can make use of the fact that the +labels are worktree-local refs (the ref `refs/rewritten/onto` would +correspond to the label `onto`, for example) in order to refer to the +branches you want to merge. Note: the first command (`label onto`) labels the revision onto which the commits are rebased; The name `onto` is just a convention, as a nod @@ -1257,6 +1273,12 @@ merge tlsv1.3 merge cmake ------------ +CONFIGURATION +------------- + +include::config/rebase.txt[] +include::config/sequencer.txt[] + BUGS ---- The todo list presented by the deprecated `--preserve-merges --interactive` diff --git a/Documentation/git-receive-pack.txt b/Documentation/git-receive-pack.txt index 25702ed730..014a78409b 100644 --- a/Documentation/git-receive-pack.txt +++ b/Documentation/git-receive-pack.txt @@ -41,6 +41,11 @@ OPTIONS <directory>:: The repository to sync into. +--http-backend-info-refs:: + Used by linkgit:git-http-backend[1] to serve up + `$GIT_URL/info/refs?service=git-receive-pack` requests. See + `--http-backend-info-refs` in linkgit:git-upload-pack[1]. + PRE-RECEIVE HOOK ---------------- Before any ref is updated, if $GIT_DIR/hooks/pre-receive file exists diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index 92f146d27d..24c00c9384 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -121,7 +121,9 @@ depth is 4095. If specified, multiple packfiles may be created, which also prevents the creation of a bitmap index. The default is unlimited, unless the config variable - `pack.packSizeLimit` is set. + `pack.packSizeLimit` is set. Note that this option may result in + a larger and slower repository; see the discussion in + `pack.packSizeLimit`. -b:: --write-bitmap-index:: @@ -165,9 +167,35 @@ depth is 4095. Pass the `--delta-islands` option to `git-pack-objects`, see linkgit:git-pack-objects[1]. -Configuration +-g=<factor>:: +--geometric=<factor>:: + Arrange resulting pack structure so that each successive pack + contains at least `<factor>` times the number of objects as the + next-largest pack. ++ +`git repack` ensures this by determining a "cut" of packfiles that need +to be repacked into one in order to ensure a geometric progression. It +picks the smallest set of packfiles such that as many of the larger +packfiles (by count of objects contained in that pack) may be left +intact. ++ +Unlike other repack modes, the set of objects to pack is determined +uniquely by the set of packs being "rolled-up"; in other words, the +packs determined to need to be combined in order to restore a geometric +progression. ++ +When `--unpacked` is specified, loose objects are implicitly included in +this "roll-up", without respect to their reachability. This is subject +to change in the future. This option (implying a drastically different +repack mode) is not guaranteed to work with all other combinations of +option to `git repack`. + +CONFIGURATION ------------- +Various configuration variables affect packing, see +linkgit:git-config[1] (search for "pack" and "delta"). + By default, the command passes `--delta-base-offset` option to 'git pack-objects'; this typically results in slightly smaller packs, but the generated packs are incompatible with versions of Git older than @@ -178,6 +206,10 @@ need to set the configuration variable `repack.UseDeltaBaseOffset` to is unaffected by this option as the conversion is performed on the fly as needed in that case. +Delta compression is not used on objects larger than the +`core.bigFileThreshold` configuration variable and on files with the +attribute `delta` set to false. + SEE ALSO -------- linkgit:git-pack-objects[1] diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt index 5da66232dc..20bb8e8217 100644 --- a/Documentation/git-rev-list.txt +++ b/Documentation/git-rev-list.txt @@ -31,6 +31,99 @@ include::rev-list-options.txt[] include::pretty-formats.txt[] +EXAMPLES +-------- + +* Print the list of commits reachable from the current branch. ++ +---------- +git rev-list HEAD +---------- + +* Print the list of commits on this branch, but not present in the + upstream branch. ++ +---------- +git rev-list @{upstream}..HEAD +---------- + +* Format commits with their author and commit message (see also the + porcelain linkgit:git-log[1]). ++ +---------- +git rev-list --format=medium HEAD +---------- + +* Format commits along with their diffs (see also the porcelain + linkgit:git-log[1], which can do this in a single process). ++ +---------- +git rev-list HEAD | +git diff-tree --stdin --format=medium -p +---------- + +* Print the list of commits on the current branch that touched any + file in the `Documentation` directory. ++ +---------- +git rev-list HEAD -- Documentation/ +---------- + +* Print the list of commits authored by you in the past year, on + any branch, tag, or other ref. ++ +---------- +git rev-list --author=you@example.com --since=1.year.ago --all +---------- + +* Print the list of objects reachable from the current branch (i.e., all + commits and the blobs and trees they contain). ++ +---------- +git rev-list --objects HEAD +---------- + +* Compare the disk size of all reachable objects, versus those + reachable from reflogs, versus the total packed size. This can tell + you whether running `git repack -ad` might reduce the repository size + (by dropping unreachable objects), and whether expiring reflogs might + help. ++ +---------- +# reachable objects +git rev-list --disk-usage --objects --all +# plus reflogs +git rev-list --disk-usage --objects --all --reflog +# total disk size used +du -c .git/objects/pack/*.pack .git/objects/??/* +# alternative to du: add up "size" and "size-pack" fields +git count-objects -v +---------- + +* Report the disk size of each branch, not including objects used by the + current branch. This can find outliers that are contributing to a + bloated repository size (e.g., because somebody accidentally committed + large build artifacts). ++ +---------- +git for-each-ref --format='%(refname)' | +while read branch +do + size=$(git rev-list --disk-usage --objects HEAD..$branch) + echo "$size $branch" +done | +sort -n +---------- + +* Compare the on-disk size of branches in one group of refs, excluding + another. If you co-mingle objects from multiple remotes in a single + repository, this can show which remotes are contributing to the + repository size (taking the size of `origin` as a baseline). ++ +---------- +git rev-list --disk-usage --objects --remotes=$suspect --not --remotes=origin +---------- + GIT --- Part of the linkgit:git[1] suite diff --git a/Documentation/git-rev-parse.txt b/Documentation/git-rev-parse.txt index 5013daa6ef..6b8ca085aa 100644 --- a/Documentation/git-rev-parse.txt +++ b/Documentation/git-rev-parse.txt @@ -212,6 +212,18 @@ Options for Files Only the names of the variables are listed, not their value, even if they are set. +--path-format=(absolute|relative):: + Controls the behavior of certain other options. If specified as absolute, the + paths printed by those options will be absolute and canonical. If specified as + relative, the paths will be relative to the current working directory if that + is possible. The default is option specific. ++ +This option may be specified multiple times and affects only the arguments that +follow it on the command line, either to the end of the command line or the next +instance of this option. + +The following options are modified by `--path-format`: + --git-dir:: Show `$GIT_DIR` if defined. Otherwise show the path to the .git directory. The path shown, when relative, is @@ -221,27 +233,9 @@ If `$GIT_DIR` is not defined and the current directory is not detected to lie in a Git repository or work tree print a message to stderr and exit with nonzero status. ---absolute-git-dir:: - Like `--git-dir`, but its output is always the canonicalized - absolute path. - --git-common-dir:: Show `$GIT_COMMON_DIR` if defined, else `$GIT_DIR`. ---is-inside-git-dir:: - When the current working directory is below the repository - directory print "true", otherwise "false". - ---is-inside-work-tree:: - When the current working directory is inside the work tree of the - repository print "true", otherwise "false". - ---is-bare-repository:: - When the repository is bare print "true", otherwise "false". - ---is-shallow-repository:: - When the repository is shallow print "true", otherwise "false". - --resolve-git-dir <path>:: Check if <path> is a valid repository or a gitfile that points at a valid repository, and print the location of the @@ -255,19 +249,9 @@ print a message to stderr and exit with nonzero status. $GIT_OBJECT_DIRECTORY is set to /foo/bar then "git rev-parse --git-path objects/abc" returns /foo/bar/abc. ---show-cdup:: - When the command is invoked from a subdirectory, show the - path of the top-level directory relative to the current - directory (typically a sequence of "../", or an empty string). - ---show-prefix:: - When the command is invoked from a subdirectory, show the - path of the current directory relative to the top-level - directory. - --show-toplevel:: - Show the absolute path of the top-level directory of the working - tree. If there is no working tree, report an error. + Show the (by default, absolute) path of the top-level directory + of the working tree. If there is no working tree, report an error. --show-superproject-working-tree:: Show the absolute path of the root of the superproject's @@ -279,6 +263,36 @@ print a message to stderr and exit with nonzero status. Show the path to the shared index file in split index mode, or empty if not in split-index mode. +The following options are unaffected by `--path-format`: + +--absolute-git-dir:: + Like `--git-dir`, but its output is always the canonicalized + absolute path. + +--is-inside-git-dir:: + When the current working directory is below the repository + directory print "true", otherwise "false". + +--is-inside-work-tree:: + When the current working directory is inside the work tree of the + repository print "true", otherwise "false". + +--is-bare-repository:: + When the repository is bare print "true", otherwise "false". + +--is-shallow-repository:: + When the repository is shallow print "true", otherwise "false". + +--show-cdup:: + When the command is invoked from a subdirectory, show the + path of the top-level directory relative to the current + directory (typically a sequence of "../", or an empty string). + +--show-prefix:: + When the command is invoked from a subdirectory, show the + path of the current directory relative to the top-level + directory. + --show-object-format[=(storage|input|output)]:: Show the object format (hash algorithm) used for the repository for storage inside the `.git` directory, input, or output. For diff --git a/Documentation/git-rm.txt b/Documentation/git-rm.txt index ab750367fd..26e9b28470 100644 --- a/Documentation/git-rm.txt +++ b/Documentation/git-rm.txt @@ -23,7 +23,9 @@ branch, and no updates to their contents can be staged in the index, though that default behavior can be overridden with the `-f` option. When `--cached` is given, the staged content has to match either the tip of the branch or the file on disk, -allowing the file to be removed from just the index. +allowing the file to be removed from just the index. When +sparse-checkouts are in use (see linkgit:git-sparse-checkout[1]), +`git rm` will only remove paths within the sparse-checkout patterns. OPTIONS diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt index 93708aefea..3db4eab4ba 100644 --- a/Documentation/git-send-email.txt +++ b/Documentation/git-send-email.txt @@ -167,6 +167,14 @@ Sending `sendemail.envelopeSender` configuration variable; if that is unspecified, choosing the envelope sender is left to your MTA. +--sendmail-cmd=<command>:: + Specify a command to run to send the email. The command should + be sendmail-like; specifically, it must support the `-i` option. + The command will be executed in the shell if necessary. Default + is the value of `sendemail.sendmailcmd`. If unspecified, and if + --smtp-server is also unspecified, git-send-email will search + for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH. + --smtp-encryption=<encryption>:: Specify the encryption to use, either 'ssl' or 'tls'. Any other value reverts to plain SMTP. Default is the value of @@ -211,13 +219,16 @@ a password is obtained using 'git-credential'. --smtp-server=<host>:: If set, specifies the outgoing SMTP server to use (e.g. - `smtp.example.com` or a raw IP address). Alternatively it can - specify a full pathname of a sendmail-like program instead; - the program must support the `-i` option. Default value can - be specified by the `sendemail.smtpServer` configuration - option; the built-in default is to search for `sendmail` in - `/usr/sbin`, `/usr/lib` and $PATH if such program is - available, falling back to `localhost` otherwise. + `smtp.example.com` or a raw IP address). If unspecified, and if + `--sendmail-cmd` is also unspecified, the default is to search + for `sendmail` in `/usr/sbin`, `/usr/lib` and $PATH if such a + program is available, falling back to `localhost` otherwise. ++ +For backward compatibility, this option can also specify a full pathname +of a sendmail-like program instead; the program must support the `-i` +option. This method does not support passing arguments or using plain +command names. For those use cases, consider using `--sendmail-cmd` +instead. --smtp-server-port=<port>:: Specifies a port different from the default port (SMTP diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt index fd93cd41e9..c9c7f3065c 100644 --- a/Documentation/git-shortlog.txt +++ b/Documentation/git-shortlog.txt @@ -111,11 +111,11 @@ include::rev-list-options.txt[] MAPPING AUTHORS --------------- -The `.mailmap` feature is used to coalesce together commits by the same -person in the shortlog, where their name and/or email address was -spelled differently. +See linkgit:gitmailmap[5]. -include::mailmap.txt[] +Note that if `git shortlog` is run outside of a repository (to process +log contents on standard input), it will look for a `.mailmap` file in +the current directory. GIT --- diff --git a/Documentation/git-show.txt b/Documentation/git-show.txt index fcf528c1b3..2b1bc7288d 100644 --- a/Documentation/git-show.txt +++ b/Documentation/git-show.txt @@ -45,10 +45,13 @@ include::pretty-options.txt[] include::pretty-formats.txt[] -COMMON DIFF OPTIONS -------------------- +DIFF FORMATTING +--------------- +The options below can be used to change the way `git show` generates +diff output. :git-log: 1 +:diff-merges-default: `dense-combined` include::diff-options.txt[] include::diff-generate-patch.txt[] diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index a0eeaeb02e..42056ee9ff 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -45,6 +45,20 @@ To avoid interfering with other worktrees, it first enables the When `--cone` is provided, the `core.sparseCheckoutCone` setting is also set, allowing for better performance with a limited set of patterns (see 'CONE PATTERN SET' below). ++ +Use the `--[no-]sparse-index` option to toggle the use of the sparse +index format. This reduces the size of the index to be more closely +aligned with your sparse-checkout definition. This can have significant +performance advantages for commands such as `git status` or `git add`. +This feature is still experimental. Some commands might be slower with +a sparse index until they are properly integrated with the feature. ++ +**WARNING:** Using a sparse index requires modifying the index in a way +that is not completely understood by external tools. If you have trouble +with this compatibility, then run `git sparse-checkout init --no-sparse-index` +to rewrite your index to not be sparse. Older versions of Git will not +understand the sparse directory entries index extension and may fail to +interact with your repository until it is disabled. 'set':: Write a set of patterns to the sparse-checkout file, as given as @@ -196,6 +210,16 @@ case-insensitive check. This corrects for case mismatched filenames in the 'git sparse-checkout set' command to reflect the expected cone in the working directory. +When changing the sparse-checkout patterns in cone mode, Git will inspect each +tracked directory that is not within the sparse-checkout cone to see if it +contains any untracked files. If all of those files are ignored due to the +`.gitignore` patterns, then the directory will be deleted. If any of the +untracked files within that directory is not ignored, then no deletions will +occur within that directory and a warning message will appear. If these files +are important, then reset your sparse-checkout definition so they are included, +use `git add` and `git commit` to store them, then remove any remaining files +manually to ensure Git can behave optimally. + SUBMODULES ---------- diff --git a/Documentation/git-stash.txt b/Documentation/git-stash.txt index 31f1beb65b..be6084ccef 100644 --- a/Documentation/git-stash.txt +++ b/Documentation/git-stash.txt @@ -8,8 +8,8 @@ git-stash - Stash the changes in a dirty working directory away SYNOPSIS -------- [verse] -'git stash' list [<options>] -'git stash' show [<options>] [<stash>] +'git stash' list [<log-options>] +'git stash' show [-u|--include-untracked|--only-untracked] [<diff-options>] [<stash>] 'git stash' drop [-q|--quiet] [<stash>] 'git stash' ( pop | apply ) [--index] [-q|--quiet] [<stash>] 'git stash' branch <branchname> [<stash>] @@ -67,7 +67,7 @@ save [-p|--patch] [-k|--[no-]keep-index] [-u|--include-untracked] [-a|--all] [-q Instead, all non-option arguments are concatenated to form the stash message. -list [<options>]:: +list [<log-options>]:: List the stash entries that you currently have. Each 'stash entry' is listed with its name (e.g. `stash@{0}` is the latest entry, `stash@{1}` is @@ -83,7 +83,7 @@ stash@{1}: On master: 9cc0589... Add git-stash The command takes options applicable to the 'git log' command to control what is shown and how. See linkgit:git-log[1]. -show [<options>] [<stash>]:: +show [-u|--include-untracked|--only-untracked] [<diff-options>] [<stash>]:: Show the changes recorded in the stash entry as a diff between the stashed contents and the commit back when the stash entry was first @@ -91,8 +91,10 @@ show [<options>] [<stash>]:: By default, the command shows the diffstat, but it will accept any format known to 'git diff' (e.g., `git stash show -p stash@{1}` to view the second most recent entry in patch form). - You can use stash.showStat and/or stash.showPatch config variables - to change the default behavior. + If no `<diff-option>` is provided, the default behavior will be given + by the `stash.showStat`, and `stash.showPatch` config variables. You + can also use `stash.showIncludeUntracked` to set whether + `--include-untracked` is enabled by default. pop [--index] [-q|--quiet] [<stash>]:: @@ -160,10 +162,18 @@ up with `git clean`. -u:: --include-untracked:: - This option is only valid for `push` and `save` commands. +--no-include-untracked:: + When used with the `push` and `save` commands, + all untracked files are also stashed and then cleaned up with + `git clean`. ++ +When used with the `show` command, show the untracked files in the stash +entry as part of the diff. + +--only-untracked:: + This option is only valid for the `show` command. + -All untracked files are also stashed and then cleaned up with -`git clean`. +Show only the untracked files in the stash entry as part of the diff. --index:: This option is only valid for `pop` and `apply` commands. diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt index c0764e850a..83f38e3198 100644 --- a/Documentation/git-status.txt +++ b/Documentation/git-status.txt @@ -130,7 +130,7 @@ ignored, then the directory is not shown, but all contents are shown. --column[=<options>]:: --no-column:: Display untracked files in columns. See configuration variable - column.status for option syntax.`--column` and `--no-column` + `column.status` for option syntax. `--column` and `--no-column` without options are equivalent to 'always' and 'never' respectively. diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index 67b143cc81..d5776ffcfd 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -1061,25 +1061,6 @@ with different name spaces. For example: branches = stable/*:refs/remotes/svn/stable/* branches = debug/*:refs/remotes/svn/debug/* -BUGS ----- - -We ignore all SVN properties except svn:executable. Any unhandled -properties are logged to $GIT_DIR/svn/<refname>/unhandled.log - -Renamed and copied directories are not detected by Git and hence not -tracked when committing to SVN. I do not plan on adding support for -this as it's quite difficult and time-consuming to get working for all -the possible corner cases (Git doesn't do it, either). Committing -renamed and copied files is fully supported if they're similar enough -for Git to detect them. - -In SVN, it is possible (though discouraged) to commit changes to a tag -(because a tag is just a directory copy, thus technically the same as a -branch). When cloning an SVN repository, 'git svn' cannot know if such a -commit to a tag will happen in the future. Thus it acts conservatively -and imports all SVN tags as branches, prefixing the tag name with 'tags/'. - CONFIGURATION ------------- @@ -1166,6 +1147,25 @@ $GIT_DIR/svn/\**/.rev_map.*:: if it is missing or not up to date. 'git svn reset' automatically rewinds it. +BUGS +---- + +We ignore all SVN properties except svn:executable. Any unhandled +properties are logged to $GIT_DIR/svn/<refname>/unhandled.log + +Renamed and copied directories are not detected by Git and hence not +tracked when committing to SVN. I do not plan on adding support for +this as it's quite difficult and time-consuming to get working for all +the possible corner cases (Git doesn't do it, either). Committing +renamed and copied files is fully supported if they're similar enough +for Git to detect them. + +In SVN, it is possible (though discouraged) to commit changes to a tag +(because a tag is just a directory copy, thus technically the same as a +branch). When cloning an SVN repository, 'git svn' cannot know if such a +commit to a tag will happen in the future. Thus it acts conservatively +and imports all SVN tags as branches, prefixing the tag name with 'tags/'. + SEE ALSO -------- linkgit:git-rebase[1] diff --git a/Documentation/git-tag.txt b/Documentation/git-tag.txt index 56656d1be6..31a97a1b6c 100644 --- a/Documentation/git-tag.txt +++ b/Documentation/git-tag.txt @@ -134,7 +134,7 @@ options for details. --column[=<options>]:: --no-column:: Display tag listing in columns. See configuration variable - column.tag for option syntax.`--column` and `--no-column` + `column.tag` for option syntax. `--column` and `--no-column` without options are equivalent to 'always' and 'never' respectively. + This option is only applicable when listing tags without annotation lines. diff --git a/Documentation/git-upload-pack.txt b/Documentation/git-upload-pack.txt index 9822c1eb1a..8f87b23ea8 100644 --- a/Documentation/git-upload-pack.txt +++ b/Documentation/git-upload-pack.txt @@ -36,14 +36,26 @@ OPTIONS This fits with the HTTP POST request processing model where a program may read the request, write a response, and must exit. ---advertise-refs:: - Only the initial ref advertisement is output, and the program exits - immediately. This fits with the HTTP GET request model, where - no request content is received but a response must be produced. +--http-backend-info-refs:: + Used by linkgit:git-http-backend[1] to serve up + `$GIT_URL/info/refs?service=git-upload-pack` requests. See + "Smart Clients" in link:technical/http-protocol.html[the HTTP + transfer protocols] documentation and "HTTP Transport" in + link:technical/protocol-v2.html[the Git Wire Protocol, Version + 2] documentation. Also understood by + linkgit:git-receive-pack[1]. <directory>:: The repository to sync from. +ENVIRONMENT +----------- + +`GIT_PROTOCOL`:: + Internal variable used for handshaking the wire protocol. Server + admins may need to configure some transports to allow this + variable to be passed. See the discussion in linkgit:git[1]. + SEE ALSO -------- linkgit:gitnamespaces[7] diff --git a/Documentation/git-version.txt b/Documentation/git-version.txt new file mode 100644 index 0000000000..80fa7754a6 --- /dev/null +++ b/Documentation/git-version.txt @@ -0,0 +1,28 @@ +git-version(1) +============== + +NAME +---- +git-version - Display version information about Git + +SYNOPSIS +-------- +[verse] +'git version' [--build-options] + +DESCRIPTION +----------- +With no options given, the version of 'git' is printed on the standard output. + +Note that `git --version` is identical to `git version` because the +former is internally converted into the latter. + +OPTIONS +------- +--build-options:: + Include additional information about how git was built for diagnostic + purposes. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/git-worktree.txt b/Documentation/git-worktree.txt index af06128cc9..8a7cbdd19c 100644 --- a/Documentation/git-worktree.txt +++ b/Documentation/git-worktree.txt @@ -9,7 +9,7 @@ git-worktree - Manage multiple working trees SYNOPSIS -------- [verse] -'git worktree add' [-f] [--detach] [--checkout] [--lock] [-b <new-branch>] <path> [<commit-ish>] +'git worktree add' [-f] [--detach] [--checkout] [--lock [--reason <string>]] [-b <new-branch>] <path> [<commit-ish>] 'git worktree list' [--porcelain] 'git worktree lock' [--reason <string>] <worktree> 'git worktree move' <worktree> <new-path> @@ -97,8 +97,9 @@ list:: List details of each working tree. The main working tree is listed first, followed by each of the linked working trees. The output details include whether the working tree is bare, the revision currently checked out, the -branch currently checked out (or "detached HEAD" if none), and "locked" if -the worktree is locked. +branch currently checked out (or "detached HEAD" if none), "locked" if +the worktree is locked, "prunable" if the worktree can be pruned by `prune` +command. lock:: @@ -143,6 +144,11 @@ locate it. Running `repair` within the recently-moved working tree will reestablish the connection. If multiple linked working trees are moved, running `repair` from any working tree with each tree's new `<path>` as an argument, will reestablish the connection to all the specified paths. ++ +If both the main working tree and linked working trees have been moved +manually, then running `repair` in the main working tree and specifying the +new `<path>` of each linked working tree will reestablish all connections +in both directions. unlock:: @@ -226,12 +232,17 @@ This can also be set up as the default behaviour by using the -v:: --verbose:: With `prune`, report all removals. ++ +With `list`, output additional information about worktrees (see below). --expire <time>:: With `prune`, only expire unused working trees older than `<time>`. ++ +With `list`, annotate missing working trees as prunable if they are +older than `<time>`. --reason <string>:: - With `lock`, an explanation why the working tree is locked. + With `lock` or with `add --lock`, an explanation why the working tree is locked. <worktree>:: Working trees can be identified by path, either relative or @@ -367,13 +378,46 @@ $ git worktree list /path/to/other-linked-worktree 1234abc (detached HEAD) ------------ +The command also shows annotations for each working tree, according to its state. +These annotations are: + + * `locked`, if the working tree is locked. + * `prunable`, if the working tree can be pruned via `git worktree prune`. + +------------ +$ git worktree list +/path/to/linked-worktree abcd1234 [master] +/path/to/locked-worktree acbd5678 (brancha) locked +/path/to/prunable-worktree 5678abc (detached HEAD) prunable +------------ + +For these annotations, a reason might also be available and this can be +seen using the verbose mode. The annotation is then moved to the next line +indented followed by the additional information. + +------------ +$ git worktree list --verbose +/path/to/linked-worktree abcd1234 [master] +/path/to/locked-worktree-no-reason abcd5678 (detached HEAD) locked +/path/to/locked-worktree-with-reason 1234abcd (brancha) + locked: working tree path is mounted on a portable device +/path/to/prunable-worktree 5678abc1 (detached HEAD) + prunable: gitdir file points to non-existent location +------------ + +Note that the annotation is moved to the next line if the additional +information is available, otherwise it stays on the same line as the +working tree itself. + Porcelain Format ~~~~~~~~~~~~~~~~ The porcelain format has a line per attribute. Attributes are listed with a label and value separated by a single space. Boolean attributes (like `bare` and `detached`) are listed as a label only, and are present only -if the value is true. The first attribute of a working tree is always -`worktree`, an empty line indicates the end of the record. For example: +if the value is true. Some attributes (like `locked`) can be listed as a label +only or with a value depending upon whether a reason is available. The first +attribute of a working tree is always `worktree`, an empty line indicates the +end of the record. For example: ------------ $ git worktree list --porcelain @@ -388,6 +432,33 @@ worktree /path/to/other-linked-worktree HEAD 1234abc1234abc1234abc1234abc1234abc1234a detached +worktree /path/to/linked-worktree-locked-no-reason +HEAD 5678abc5678abc5678abc5678abc5678abc5678c +branch refs/heads/locked-no-reason +locked + +worktree /path/to/linked-worktree-locked-with-reason +HEAD 3456def3456def3456def3456def3456def3456b +branch refs/heads/locked-with-reason +locked reason why is locked + +worktree /path/to/linked-worktree-prunable +HEAD 1233def1234def1234def1234def1234def1234b +detached +prunable gitdir file points to non-existent location + +------------ + +If the lock reason contains "unusual" characters such as newline, they +are escaped and the entire reason is quoted as explained for the +configuration variable `core.quotePath` (see linkgit:git-config[1]). +For Example: + +------------ +$ git worktree list --porcelain +... +locked "reason\nwhy is locked" +... ------------ EXAMPLES diff --git a/Documentation/git.txt b/Documentation/git.txt index a6d4ad0818..abace9eac2 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -13,7 +13,7 @@ SYNOPSIS [--exec-path[=<path>]] [--html-path] [--man-path] [--info-path] [-p|--paginate|-P|--no-pager] [--no-replace-objects] [--bare] [--git-dir=<path>] [--work-tree=<path>] [--namespace=<name>] - [--super-prefix=<path>] + [--super-prefix=<path>] [--config-env=<name>=<envvar>] <command> [<args>] DESCRIPTION @@ -41,6 +41,10 @@ OPTIONS ------- --version:: Prints the Git suite version that the 'git' program came from. ++ +This option is internaly converted to `git version ...` and accepts +the same options as the linkgit:git-version[1] command. If `--help` is +also given, it takes precedence over `--version`. --help:: Prints the synopsis and a list of the most commonly used @@ -80,6 +84,28 @@ config file). Including the equals but with an empty value (like `git -c foo.bar= ...`) sets `foo.bar` to the empty string which `git config --type=bool` will convert to `false`. +--config-env=<name>=<envvar>:: + Like `-c <name>=<value>`, give configuration variable + '<name>' a value, where <envvar> is the name of an + environment variable from which to retrieve the value. Unlike + `-c` there is no shortcut for directly setting the value to an + empty string, instead the environment variable itself must be + set to the empty string. It is an error if the `<envvar>` does not exist + in the environment. `<envvar>` may not contain an equals sign + to avoid ambiguity with `<name>` containing one. ++ +This is useful for cases where you want to pass transitory +configuration options to git, but are doing so on OS's where +other processes might be able to read your cmdline +(e.g. `/proc/self/cmdline`), but not your environ +(e.g. `/proc/self/environ`). That behavior is the default on +Linux, but may not be on your system. ++ +Note that this might add security for variables such as +`http.extraHeader` where the sensitive information is part of +the value, but not e.g. `url.<base>.insteadOf` where the +sensitive information can be part of the key. + --exec-path[=<path>]:: Path to wherever your core Git programs are installed. This can also be controlled by setting the GIT_EXEC_PATH @@ -648,6 +674,16 @@ for further details. If this environment variable is set to `0`, git will not prompt on the terminal (e.g., when asking for HTTP authentication). +`GIT_CONFIG_GLOBAL`:: +`GIT_CONFIG_SYSTEM`:: + Take the configuration from the given files instead from global or + system-level configuration files. If `GIT_CONFIG_SYSTEM` is set, the + system config file defined at build time (usually `/etc/gitconfig`) + will not be read. Likewise, if `GIT_CONFIG_GLOBAL` is set, neither + `$HOME/.gitconfig` nor `$XDG_CONFIG_HOME/git/config` will be read. Can + be set to `/dev/null` to skip reading configuration files of the + respective level. + `GIT_CONFIG_NOSYSTEM`:: Whether to skip reading settings from the system-wide `$(prefix)/etc/gitconfig` file. This environment variable can @@ -862,6 +898,21 @@ for full details. Contains a colon ':' separated list of keys with optional values 'key[=value]'. Presence of unknown keys and values must be ignored. ++ +Note that servers may need to be configured to allow this variable to +pass over some transports. It will be propagated automatically when +accessing local repositories (i.e., `file://` or a filesystem path), as +well as over the `git://` protocol. For git-over-http, it should work +automatically in most configurations, but see the discussion in +linkgit:git-http-backend[1]. For git-over-ssh, the ssh server may need +to be configured to allow clients to pass this variable (e.g., by using +`AcceptEnv GIT_PROTOCOL` with OpenSSH). ++ +This configuration is optional. If the variable is not propagated, then +clients will fall back to the original "v0" protocol (but may miss out +on some performance improvements or features). This variable currently +only affects clones and fetches; it is not yet used for pushes (but may +be in the future). `GIT_OPTIONAL_LOCKS`:: If set to `0`, Git will complete any requested operation without diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index e84e104f93..83fd4e19a4 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -845,6 +845,8 @@ patterns are available: - `rust` suitable for source code in the Rust language. +- `scheme` suitable for source code in the Scheme language. + - `tex` suitable for source code for LaTeX documents. @@ -1174,7 +1176,8 @@ tag then no replacement will be done. The placeholders are the same as those for the option `--pretty=format:` of linkgit:git-log[1], except that they need to be wrapped like this: `$Format:PLACEHOLDERS$` in the file. E.g. the string `$Format:%H$` will be replaced by the -commit hash. +commit hash. However, only one `%(describe)` placeholder is expanded +per archive to avoid denial-of-service attacks. Packing objects @@ -1244,6 +1247,12 @@ to: [attr]binary -diff -merge -text ------------ +NOTES +----- + +Git does not follow symbolic links when accessing a `.gitattributes` +file in the working tree. This keeps behavior consistent when the file +is accessed from the index or a tree versus from the filesystem. EXAMPLES -------- diff --git a/Documentation/gitdiffcore.txt b/Documentation/gitdiffcore.txt index c970d9fe43..0d57f86abc 100644 --- a/Documentation/gitdiffcore.txt +++ b/Documentation/gitdiffcore.txt @@ -74,6 +74,7 @@ into another list. There are currently 5 such transformations: - diffcore-merge-broken - diffcore-pickaxe - diffcore-order +- diffcore-rotate These are applied in sequence. The set of filepairs 'git diff-{asterisk}' commands find are used as the input to diffcore-break, and @@ -168,6 +169,26 @@ a similarity score different from the default of 50% by giving a number after the "-M" or "-C" option (e.g. "-M8" to tell it to use 8/10 = 80%). +Note that when rename detection is on but both copy and break +detection are off, rename detection adds a preliminary step that first +checks if files are moved across directories while keeping their +filename the same. If there is a file added to a directory whose +contents is sufficiently similar to a file with the same name that got +deleted from a different directory, it will mark them as renames and +exclude them from the later quadratic step (the one that pairwise +compares all unmatched files to find the "best" matches, determined by +the highest content similarity). So, for example, if a deleted +docs/ext.txt and an added docs/config/ext.txt are similar enough, they +will be marked as a rename and prevent an added docs/ext.md that may +be even more similar to the deleted docs/ext.txt from being considered +as the rename destination in the later step. For this reason, the +preliminary "match same filename" step uses a bit higher threshold to +mark a file pair as a rename and stop considering other candidates for +better matches. At most, one comparison is done per file in this +preliminary pass; so if there are several remaining ext.txt files +throughout the directory hierarchy after exact rename detection, this +preliminary step may be skipped for those files. + Note. When the "-C" option is used with `--find-copies-harder` option, 'git diff-{asterisk}' commands feed unmodified filepairs to diffcore mechanism as well as modified ones. This lets the copy @@ -276,6 +297,26 @@ Documentation t ------------------------------------------------ +diffcore-rotate: For Changing At Which Path Output Starts +--------------------------------------------------------- + +This transformation takes one pathname, and rotates the set of +filepairs so that the filepair for the given pathname comes first, +optionally discarding the paths that come before it. This is used +to implement the `--skip-to` and the `--rotate-to` options. It is +an error when the specified pathname is not in the set of filepairs, +but it is not useful to error out when used with "git log" family of +commands, because it is unreasonable to expect that a given path +would be modified by each and every commit shown by the "git log" +command. For this reason, when used with "git log", the filepair +that sorts the same as, or the first one that sorts after, the given +pathname is where the output starts. + +Use of this transformation combined with diffcore-order will produce +unexpected results, as the input to this transformation is likely +not sorted when diffcore-order is in effect. + + SEE ALSO -------- linkgit:git-diff[1], diff --git a/Documentation/gitfaq.txt b/Documentation/gitfaq.txt index afdaeab850..8c1f2d5675 100644 --- a/Documentation/gitfaq.txt +++ b/Documentation/gitfaq.txt @@ -275,7 +275,7 @@ best to always use a regular merge commit. [[merge-two-revert-one]] If I make a change on two branches but revert it on one, why does the merge of those branches include the change?:: - By default, when Git does a merge, it uses a strategy called the recursive + By default, when Git does a merge, it uses a strategy called the `ort` strategy, which does a fancy three-way merge. In such a case, when Git performs the merge, it considers exactly three points: the two heads and a third point, called the _merge base_, which is usually the common ancestor of diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index 1f3b57d04d..b51959ff94 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -138,7 +138,7 @@ given); `template` (if a `-t` option was given or the configuration option `commit.template` is set); `merge` (if the commit is a merge or a `.git/MERGE_MSG` file exists); `squash` (if a `.git/SQUASH_MSG` file exists); or `commit`, followed by -a commit SHA-1 (if a `-c`, `-C` or `--amend` option was given). +a commit object name (if a `-c`, `-C` or `--amend` option was given). If the exit status is non-zero, `git commit` will abort. @@ -231,19 +231,19 @@ named remote is not being used both values will be the same. Information about what is to be pushed is provided on the hook's standard input with lines of the form: - <local ref> SP <local sha1> SP <remote ref> SP <remote sha1> LF + <local ref> SP <local object name> SP <remote ref> SP <remote object name> LF For instance, if the command +git push origin master:foreign+ were run the hook would receive a line like the following: refs/heads/master 67890 refs/heads/foreign 12345 -although the full, 40-character SHA-1s would be supplied. If the foreign ref -does not yet exist the `<remote SHA-1>` will be 40 `0`. If a ref is to be -deleted, the `<local ref>` will be supplied as `(delete)` and the `<local -SHA-1>` will be 40 `0`. If the local commit was specified by something other -than a name which could be expanded (such as `HEAD~`, or a SHA-1) it will be -supplied as it was originally given. +although the full object name would be supplied. If the foreign ref does not +yet exist the `<remote object name>` will be the all-zeroes object name. If a +ref is to be deleted, the `<local ref>` will be supplied as `(delete)` and the +`<local object name>` will be the all-zeroes object name. If the local commit +was specified by something other than a name which could be expanded (such as +`HEAD~`, or an object name) it will be supplied as it was originally given. If this hook exits with a non-zero status, `git push` will abort without pushing anything. Information about why the push is rejected may be sent @@ -268,7 +268,7 @@ input a line of the format: where `<old-value>` is the old object name stored in the ref, `<new-value>` is the new object name to be stored in the ref and `<ref-name>` is the full name of the ref. -When creating a new ref, `<old-value>` is 40 `0`. +When creating a new ref, `<old-value>` is the all-zeroes object name. If the hook exits with non-zero status, none of the refs will be updated. If the hook exits with zero, updating of individual refs can @@ -473,7 +473,8 @@ reference-transaction This hook is invoked by any Git command that performs reference updates. It executes whenever a reference transaction is prepared, -committed or aborted and may thus get called multiple times. +committed or aborted and may thus get called multiple times. The hook +does not cover symbolic references (but that may change in the future). The hook takes exactly one argument, which is the current state the given reference transaction is in: @@ -492,6 +493,14 @@ receives on standard input a line of the format: <old-value> SP <new-value> SP <ref-name> LF +where `<old-value>` is the old object name passed into the reference +transaction, `<new-value>` is the new object name to be stored in the +ref and `<ref-name>` is the full name of the ref. When force updating +the reference regardless of its current value or when the reference is +to be created anew, `<old-value>` is the all-zeroes object name. To +distinguish these cases, you can inspect the current value of +`<ref-name>` via `git rev-parse`. + The exit status of the hook is ignored for any state except for the "prepared" state. In the "prepared" state, a non-zero exit status will cause the transaction to be aborted. The hook will not be called with @@ -550,7 +559,7 @@ command-dependent arguments may be passed in the future. The hook receives a list of the rewritten commits on stdin, in the format - <old-sha1> SP <new-sha1> [ SP <extra-info> ] LF + <old-object-name> SP <new-object-name> [ SP <extra-info> ] LF The 'extra-info' is again command-dependent. If it is empty, the preceding SP is also omitted. Currently, no commands pass any @@ -566,7 +575,7 @@ rebase:: For the 'squash' and 'fixup' operation, all commits that were squashed are listed as being rewritten to the squashed commit. This means that there will be several lines sharing the same - 'new-sha1'. + 'new-object-name'. + The commits are guaranteed to be listed in the order that they were processed by rebase. diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index d47b1ae296..f8a1fc2014 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -27,12 +27,11 @@ precedence, the last matching pattern decides the outcome): them. * Patterns read from a `.gitignore` file in the same directory - as the path, or in any parent directory, with patterns in the - higher level files (up to the toplevel of the work tree) being overridden - by those in lower level files down to the directory containing the file. - These patterns match relative to the location of the - `.gitignore` file. A project normally includes such - `.gitignore` files in its repository, containing patterns for + as the path, or in any parent directory (up to the top-level of the working + tree), with patterns in the higher level files being overridden by those in + lower level files down to the directory containing the file. These patterns + match relative to the location of the `.gitignore` file. A project normally + includes such `.gitignore` files in its repository, containing patterns for files generated as part of the project build. * Patterns read from `$GIT_DIR/info/exclude`. @@ -149,11 +148,15 @@ not tracked by Git remain untracked. To stop tracking a file that is currently tracked, use 'git rm --cached'. +Git does not follow symbolic links when accessing a `.gitignore` file in +the working tree. This keeps behavior consistent when the file is +accessed from the index or a tree versus from the filesystem. + EXAMPLES -------- - The pattern `hello.*` matches any file or folder - whose name begins with `hello`. If one wants to restrict + whose name begins with `hello.`. If one wants to restrict this only to the directory and not in its subdirectories, one can prepend the pattern with a slash, i.e. `/hello.*`; the pattern now matches `hello.txt`, `hello.c` but not diff --git a/Documentation/gitmailmap.txt b/Documentation/gitmailmap.txt new file mode 100644 index 0000000000..06f4af93fe --- /dev/null +++ b/Documentation/gitmailmap.txt @@ -0,0 +1,130 @@ +gitmailmap(5) +============= + +NAME +---- +gitmailmap - Map author/committer names and/or E-Mail addresses + +SYNOPSIS +-------- +$GIT_WORK_TREE/.mailmap + + +DESCRIPTION +----------- + +If the file `.mailmap` exists at the toplevel of the repository, or at +the location pointed to by the `mailmap.file` or `mailmap.blob` +configuration options (see linkgit:git-config[1]), it +is used to map author and committer names and email addresses to +canonical real names and email addresses. + + +SYNTAX +------ + +The '#' character begins a comment to the end of line, blank lines +are ignored. + +In the simple form, each line in the file consists of the canonical +real name of an author, whitespace, and an email address used in the +commit (enclosed by '<' and '>') to map to the name. For example: +-- + Proper Name <commit@email.xx> +-- + +The more complex forms are: +-- + <proper@email.xx> <commit@email.xx> +-- +which allows mailmap to replace only the email part of a commit, and: +-- + Proper Name <proper@email.xx> <commit@email.xx> +-- +which allows mailmap to replace both the name and the email of a +commit matching the specified commit email address, and: +-- + Proper Name <proper@email.xx> Commit Name <commit@email.xx> +-- +which allows mailmap to replace both the name and the email of a +commit matching both the specified commit name and email address. + +Both E-Mails and names are matched case-insensitively. For example +this would also match the 'Commit Name <commit@email.xx>' above: +-- + Proper Name <proper@email.xx> CoMmIt NaMe <CoMmIt@EmAiL.xX> +-- + +NOTES +----- + +Git does not follow symbolic links when accessing a `.mailmap` file in +the working tree. This keeps behavior consistent when the file is +accessed from the index or a tree versus from the filesystem. + +EXAMPLES +-------- + +Your history contains commits by two authors, Jane +and Joe, whose names appear in the repository under several forms: + +------------ +Joe Developer <joe@example.com> +Joe R. Developer <joe@example.com> +Jane Doe <jane@example.com> +Jane Doe <jane@laptop.(none)> +Jane D. <jane@desktop.(none)> +------------ + +Now suppose that Joe wants his middle name initial used, and Jane +prefers her family name fully spelled out. A `.mailmap` file to +correct the names would look like: + +------------ +Joe R. Developer <joe@example.com> +Jane Doe <jane@example.com> +Jane Doe <jane@desktop.(none)> +------------ + +Note that there's no need to map the name for '<jane@laptop.(none)>' to +only correct the names. However, leaving the obviously broken +'<jane@laptop.(none)>' and '<jane@desktop.(none)>' E-Mails as-is is +usually not what you want. A `.mailmap` file which also corrects those +is: + +------------ +Joe R. Developer <joe@example.com> +Jane Doe <jane@example.com> <jane@laptop.(none)> +Jane Doe <jane@example.com> <jane@desktop.(none)> +------------ + +Finally, let's say that Joe and Jane shared an E-Mail address, but not +a name, e.g. by having these two commits in the history generated by a +bug reporting system. I.e. names appearing in history as: + +------------ +Joe <bugs@example.com> +Jane <bugs@example.com> +------------ + +A full `.mailmap` file which also handles those cases (an addition of +two lines to the above example) would be: + +------------ +Joe R. Developer <joe@example.com> +Jane Doe <jane@example.com> <jane@laptop.(none)> +Jane Doe <jane@example.com> <jane@desktop.(none)> +Joe R. Developer <joe@example.com> Joe <bugs@example.com> +Jane Doe <jane@example.com> Jane <bugs@example.com> +------------ + + + +SEE ALSO +-------- +linkgit:git-check-mailmap[1] + + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/gitmodules.txt b/Documentation/gitmodules.txt index 8e333dde1b..dcee09b500 100644 --- a/Documentation/gitmodules.txt +++ b/Documentation/gitmodules.txt @@ -98,6 +98,14 @@ submodule.<name>.shallow:: shallow clone (with a history depth of 1) unless the user explicitly asks for a non-shallow clone. +NOTES +----- + +Git does not allow the `.gitmodules` file within a working tree to be a +symbolic link, and will refuse to check out such a tree entry. This +keeps behavior consistent when the file is accessed from the index or a +tree versus from the filesystem, and helps Git reliably enforce security +checks of the file contents. EXAMPLES -------- diff --git a/Documentation/gitnamespaces.txt b/Documentation/gitnamespaces.txt index b614969ad2..1c8d2ecc35 100644 --- a/Documentation/gitnamespaces.txt +++ b/Documentation/gitnamespaces.txt @@ -62,3 +62,7 @@ git clone ext::'git --namespace=foo %s /tmp/prefixed.git' ---------- include::transfer-data-leaks.txt[] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/gittutorial.txt b/Documentation/gittutorial.txt index 59ef5cef1f..0e0b863105 100644 --- a/Documentation/gittutorial.txt +++ b/Documentation/gittutorial.txt @@ -322,7 +322,7 @@ initiating this "pull". If Bob's work conflicts with what Alice did since their histories forked, Alice will use her working tree and the index to resolve conflicts, and existing local changes will interfere with the conflict resolution process (Git will still perform the fetch but will -refuse to merge --- Alice will have to get rid of her local changes in +refuse to merge -- Alice will have to get rid of her local changes in some way and pull again when this happens). Alice can peek at what Bob did without merging first, using the "fetch" diff --git a/Documentation/gitweb.conf.txt b/Documentation/gitweb.conf.txt index 7963a79ba9..34b1d6e224 100644 --- a/Documentation/gitweb.conf.txt +++ b/Documentation/gitweb.conf.txt @@ -751,6 +751,17 @@ default font sizes or lineheights are changed (e.g. via adding extra CSS stylesheet in `@stylesheets`), it may be appropriate to change these values. +email-privacy:: + Redact e-mail addresses from the generated HTML, etc. content. + This obscures e-mail addresses retrieved from the author/committer + and comment sections of the Git log. + It is meant to hinder web crawlers that harvest and abuse addresses. + Such crawlers may not respect robots.txt. + Note that users and user tools also see the addresses as redacted. + If Gitweb is not the final step in a workflow then subsequent steps + may misbehave because of the redacted information they receive. + Disabled by default. + highlight:: Server-side syntax highlight support in "blob" view. It requires `$highlight_bin` program to be available (see the description of diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt index 67c7a50b96..c077971335 100644 --- a/Documentation/glossary-content.txt +++ b/Documentation/glossary-content.txt @@ -146,8 +146,8 @@ current branch integrates with) obviously do not work, as there is no <<def_revision,revision>> and you are "merging" another <<def_branch,branch>>'s changes that happen to be a descendant of what you have. In such a case, you do not make a new <<def_merge,merge>> - <<def_commit,commit>> but instead just update to his - revision. This will happen frequently on a + <<def_commit,commit>> but instead just update your branch to point at the same + revision as the branch you are merging. This will happen frequently on a <<def_remote_tracking_branch,remote-tracking branch>> of a remote <<def_repository,repository>>. diff --git a/Documentation/howto/coordinate-embargoed-releases.txt b/Documentation/howto/coordinate-embargoed-releases.txt new file mode 100644 index 0000000000..601aae88e9 --- /dev/null +++ b/Documentation/howto/coordinate-embargoed-releases.txt @@ -0,0 +1,131 @@ +Content-type: text/asciidoc +Abstract: When a critical vulnerability is discovered and fixed, we follow this + script to coordinate a public release. + +How we coordinate embargoed releases +==================================== + +To protect Git users from critical vulnerabilities, we do not just release +fixed versions like regular maintenance releases. Instead, we coordinate +releases with packagers, keeping the fixes under an embargo until the release +date. That way, users will have a chance to upgrade on that date, no matter +what Operating System or distribution they run. + +Open a Security Advisory draft +------------------------------ + +The first step is to https://github.com/git/git/security/advisories/new[open an +advisory]. Technically, it is not necessary, but it is convenient and saves a +bit of hassle. This advisory can also be used to obtain the CVE number and it +will give us a private fork associated with it that can be used to collaborate +on a fix. + +Release date of the embargoed version +------------------------------------- + +If the vulnerability affects Windows users, we want to have our friends over at +Visual Studio on board. This means we need to target a "Patch Tuesday" (i.e. a +second Tuesday of the month), at the minimum three weeks from heads-up to +coordinated release. + +If the vulnerability affects the server side, or can benefit from scans on the +server side (i.e. if `git fsck` can detect an attack), it is important to give +all involved Git repository hosting sites enough time to scan all of those +repositories. + +Notifying the Linux distributions +--------------------------------- + +At most two weeks before release date, we need to send a notification to +distros@vs.openwall.org, preferably less than 7 days before the release date. +This will reach most (all?) Linux distributions. See an example below, and the +guidelines for this mailing list at +https://oss-security.openwall.org/wiki/mailing-lists/distros#how-to-use-the-lists[here]. + +Once the version has been published, we send a note about that to oss-security. +As an example, see https://www.openwall.com/lists/oss-security/2019/12/13/1[the +v2.24.1 mail]; +https://oss-security.openwall.org/wiki/mailing-lists/oss-security[Here] are +their guidelines. + +The mail to oss-security should also describe the exploit, and give credit to +the reporter(s): security researchers still receive too little respect for the +invaluable service they provide, and public credit goes a long way to keep them +paid by their respective organizations. + +Technically, describing any exploit can be delayed up to 7 days, but we usually +refrain from doing that, including it right away. + +As a courtesy we typically attach a Git bundle (as `.tar.xz` because the list +will drop `.bundle` attachments) in the mail to distros@ so that the involved +parties can take care of integrating/backporting them. This bundle is typically +created using a command like this: + + git bundle create cve-xxx.bundle ^origin/master vA.B.C vD.E.F + tar cJvf cve-xxx.bundle.tar.xz cve-xxx.bundle + +Example mail to distros@vs.openwall.org +--------------------------------------- + +.... +To: distros@vs.openwall.org +Cc: git-security@googlegroups.com, <other people involved in the report/fix> +Subject: [vs] Upcoming Git security fix release + +Team, + +The Git project will release new versions on <date> at 10am Pacific Time or +soon thereafter. I have attached a Git bundle (embedded in a `.tar.xz` to avoid +it being dropped) which you can fetch into a clone of +https://github.com/git/git via `git fetch --tags /path/to/cve-xxx.bundle`, +containing the tags for versions <versions>. + +You can verify with `git tag -v <tag>` that the versions were signed by +the Git maintainer, using the same GPG key as e.g. v2.24.0. + +Please use these tags to prepare `git` packages for your various +distributions, using the appropriate tagged versions. The added test cases +help verify the correctness. + +The addressed issues are: + +<list of CVEs with a short description, typically copy/pasted from Git's +release notes, usually demo exploit(s), too> + +Credit for finding the vulnerability goes to <reporter>, credit for fixing +it goes to <developer>. + +Thanks, +<name> + +.... + +Example mail to oss-security@lists.openwall.com +----------------------------------------------- + +.... +To: oss-security@lists.openwall.com +Cc: git-security@googlegroups.com, <other people involved in the report/fix> +Subject: git: <copy from security advisory> + +Team, + +The Git project released new versions on <date>, addressing <CVE>. + +All supported platforms are affected in one way or another, and all Git +versions all the way back to <version> are affected. The fixed versions are: +<versions>. + +Link to the announcement: <link to lore.kernel.org/git> + +We highly recommend to upgrade. + +The addressed issues are: +* <list of CVEs and their explanations, along with demo exploits> + +Credit for finding the vulnerability goes to <reporter>, credit for fixing +it goes to <developer>. + +Thanks, +<name> +.... diff --git a/Documentation/i18n.txt b/Documentation/i18n.txt index 7e36e5b55b..6c6baeeeb7 100644 --- a/Documentation/i18n.txt +++ b/Documentation/i18n.txt @@ -38,7 +38,7 @@ mind. a warning if the commit log message given to it does not look like a valid UTF-8 string, unless you explicitly say your project uses a legacy encoding. The way to say this is to - have i18n.commitencoding in `.git/config` file, like this: + have `i18n.commitEncoding` in `.git/config` file, like this: + ------------ [i18n] diff --git a/Documentation/lint-gitlink.perl b/Documentation/lint-gitlink.perl index 476cc30b83..b22a367844 100755 --- a/Documentation/lint-gitlink.perl +++ b/Documentation/lint-gitlink.perl @@ -1,71 +1,67 @@ #!/usr/bin/perl -use File::Find; -use Getopt::Long; +use strict; +use warnings; -my $basedir = "."; -GetOptions("basedir=s" => \$basedir) - or die("Cannot parse command line arguments\n"); +# Parse arguments, a simple state machine for input like: +# +# howto/*.txt config/*.txt --section=1 git.txt git-add.txt [...] --to-lint git-add.txt a-file.txt [...] +my %TXT; +my %SECTION; +my $section; +my $lint_these = 0; +for my $arg (@ARGV) { + if (my ($sec) = $arg =~ /^--section=(\d+)$/s) { + $section = $sec; + next; + } -my $found_errors = 0; + my ($name) = $arg =~ /^(.*?)\.txt$/s; + unless (defined $section) { + $TXT{$name} = $arg; + next; + } -sub report { - my ($where, $what, $error) = @_; - print "$where: $error: $what\n"; - $found_errors = 1; + $SECTION{$name} = $section; } -sub grab_section { - my ($page) = @_; - open my $fh, "<", "$basedir/$page.txt"; - my $firstline = <$fh>; - chomp $firstline; - close $fh; - my ($section) = ($firstline =~ /.*\((\d)\)$/); - return $section; +my $exit_code = 0; +sub report { + my ($pos, $line, $target, $msg) = @_; + substr($line, $pos) = "' <-- HERE"; + $line =~ s/^\s+//; + print "$ARGV:$.: error: $target: $msg, shown with 'HERE' below:\n"; + print "$ARGV:$.:\t'$line\n"; + $exit_code = 1; } -sub lint { - my ($file) = @_; - open my $fh, "<", $file - or return; - while (<$fh>) { - my $where = "$file:$."; - while (s/linkgit:((.*?)\[(\d)\])//) { - my ($target, $page, $section) = ($1, $2, $3); +@ARGV = sort values %TXT; +die "BUG: Nothing to process!" unless @ARGV; +while (<>) { + my $line = $_; + while ($line =~ m/linkgit:((.*?)\[(\d)\])/g) { + my $pos = pos $line; + my ($target, $page, $section) = ($1, $2, $3); - # De-AsciiDoc - $page =~ s/{litdd}/--/g; + # De-AsciiDoc + $page =~ s/{litdd}/--/g; - if ($page !~ /^git/) { - report($where, $target, "nongit link"); - next; - } - if (! -f "$basedir/$page.txt") { - report($where, $target, "no such source"); - next; - } - $real_section = grab_section($page); - if ($real_section != $section) { - report($where, $target, - "wrong section (should be $real_section)"); - next; - } + if (!exists $TXT{$page}) { + report($pos, $line, $target, "link outside of our own docs"); + next; + } + if (!exists $SECTION{$page}) { + report($pos, $line, $target, "link outside of our sectioned docs"); + next; + } + my $real_section = $SECTION{$page}; + if ($section != $SECTION{$page}) { + report($pos, $line, $target, "wrong section (should be $real_section)"); + next; } } - close $fh; -} - -sub lint_it { - lint($File::Find::name) if -f && /\.txt$/; -} - -if (!@ARGV) { - find({ wanted => \&lint_it, no_chdir => 1 }, $basedir); -} else { - for (@ARGV) { - lint($_); - } + # this resets our $. for each file + close ARGV if eof; } -exit $found_errors; +exit $exit_code; diff --git a/Documentation/lint-man-end-blurb.perl b/Documentation/lint-man-end-blurb.perl new file mode 100755 index 0000000000..d69312e5db --- /dev/null +++ b/Documentation/lint-man-end-blurb.perl @@ -0,0 +1,24 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my $exit_code = 0; +sub report { + my ($target, $msg) = @_; + print "error: $target: $msg\n"; + $exit_code = 1; +} + +local $/; +while (my $slurp = <>) { + report($ARGV, "has no 'Part of the linkgit:git[1] suite' end blurb") + unless $slurp =~ m[ + ^GIT\n + ---\n + \QPart of the linkgit:git[1] suite\E \n + \z + ]mx; +} + +exit $exit_code; diff --git a/Documentation/lint-man-section-order.perl b/Documentation/lint-man-section-order.perl new file mode 100755 index 0000000000..b05f9156dd --- /dev/null +++ b/Documentation/lint-man-section-order.perl @@ -0,0 +1,105 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my %SECTIONS; +{ + my $order = 0; + %SECTIONS = ( + 'NAME' => { + required => 1, + order => $order++, + }, + 'SYNOPSIS' => { + required => 1, + order => $order++, + }, + 'DESCRIPTION' => { + required => 1, + order => $order++, + }, + 'OPTIONS' => { + order => $order++, + required => 0, + }, + 'CONFIGURATION' => { + order => $order++, + }, + 'BUGS' => { + order => $order++, + }, + 'SEE ALSO' => { + order => $order++, + }, + 'GIT' => { + required => 1, + order => $order++, + }, + ); +} +my $SECTION_RX = do { + my ($names) = join "|", keys %SECTIONS; + qr/^($names)$/s; +}; + +my $exit_code = 0; +sub report { + my ($msg) = @_; + print "$ARGV:$.: $msg\n"; + $exit_code = 1; +} + +my $last_was_section; +my @actual_order; +while (my $line = <>) { + chomp $line; + if ($line =~ $SECTION_RX) { + push @actual_order => $line; + $last_was_section = 1; + # Have no "last" section yet, processing NAME + next if @actual_order == 1; + + my @expected_order = sort { + $SECTIONS{$a}->{order} <=> $SECTIONS{$b}->{order} + } @actual_order; + + my $expected_last = $expected_order[-2]; + my $actual_last = $actual_order[-2]; + if ($actual_last ne $expected_last) { + report("section '$line' incorrectly ordered, comes after '$actual_last'"); + } + next; + } + if ($last_was_section) { + my $last_section = $actual_order[-1]; + if (length $last_section ne length $line) { + report("dashes under '$last_section' should match its length!"); + } + if ($line !~ /^-+$/) { + report("dashes under '$last_section' should be '-' dashes!"); + } + $last_was_section = 0; + } + + if (eof) { + # We have both a hash and an array to consider, for + # convenience + my %actual_sections; + @actual_sections{@actual_order} = (); + + for my $section (sort keys %SECTIONS) { + next if !$SECTIONS{$section}->{required} or exists $actual_sections{$section}; + report("has no required '$section' section!"); + } + + # Reset per-file state + { + @actual_order = (); + # this resets our $. for each file + close ARGV; + } + } +} + +exit $exit_code; diff --git a/Documentation/mailmap.txt b/Documentation/mailmap.txt deleted file mode 100644 index 4a8c276529..0000000000 --- a/Documentation/mailmap.txt +++ /dev/null @@ -1,75 +0,0 @@ -If the file `.mailmap` exists at the toplevel of the repository, or at -the location pointed to by the mailmap.file or mailmap.blob -configuration options, it -is used to map author and committer names and email addresses to -canonical real names and email addresses. - -In the simple form, each line in the file consists of the canonical -real name of an author, whitespace, and an email address used in the -commit (enclosed by '<' and '>') to map to the name. For example: --- - Proper Name <commit@email.xx> --- - -The more complex forms are: --- - <proper@email.xx> <commit@email.xx> --- -which allows mailmap to replace only the email part of a commit, and: --- - Proper Name <proper@email.xx> <commit@email.xx> --- -which allows mailmap to replace both the name and the email of a -commit matching the specified commit email address, and: --- - Proper Name <proper@email.xx> Commit Name <commit@email.xx> --- -which allows mailmap to replace both the name and the email of a -commit matching both the specified commit name and email address. - -Example 1: Your history contains commits by two authors, Jane -and Joe, whose names appear in the repository under several forms: - ------------- -Joe Developer <joe@example.com> -Joe R. Developer <joe@example.com> -Jane Doe <jane@example.com> -Jane Doe <jane@laptop.(none)> -Jane D. <jane@desktop.(none)> ------------- - -Now suppose that Joe wants his middle name initial used, and Jane -prefers her family name fully spelled out. A proper `.mailmap` file -would look like: - ------------- -Jane Doe <jane@desktop.(none)> -Joe R. Developer <joe@example.com> ------------- - -Note how there is no need for an entry for `<jane@laptop.(none)>`, because the -real name of that author is already correct. - -Example 2: Your repository contains commits from the following -authors: - ------------- -nick1 <bugs@company.xx> -nick2 <bugs@company.xx> -nick2 <nick2@company.xx> -santa <me@company.xx> -claus <me@company.xx> -CTO <cto@coompany.xx> ------------- - -Then you might want a `.mailmap` file that looks like: ------------- -<cto@company.xx> <cto@coompany.xx> -Some Dude <some@dude.xx> nick1 <bugs@company.xx> -Other Author <other@author.xx> nick2 <bugs@company.xx> -Other Author <other@author.xx> <nick2@company.xx> -Santa Claus <santa.claus@northpole.xx> <me@company.xx> ------------- - -Use hash '#' for comments that are either on their own line, or after -the email address. diff --git a/Documentation/merge-options.txt b/Documentation/merge-options.txt index eb0aabd396..61ec157c2f 100644 --- a/Documentation/merge-options.txt +++ b/Documentation/merge-options.txt @@ -2,6 +2,9 @@ --no-commit:: Perform the merge and commit the result. This option can be used to override --no-commit. +ifdef::git-pull[] + Only useful when merging. +endif::git-pull[] + With --no-commit perform the merge and stop just before creating a merge commit, to give the user a chance to inspect and further @@ -39,6 +42,7 @@ set to `no` at the beginning of them. to `MERGE_MSG` before being passed on to the commit machinery in the case of a merge conflict. +ifdef::git-merge[] --ff:: --no-ff:: --ff-only:: @@ -47,6 +51,22 @@ set to `no` at the beginning of them. default unless merging an annotated (and possibly signed) tag that is not stored in its natural place in the `refs/tags/` hierarchy, in which case `--no-ff` is assumed. +endif::git-merge[] +ifdef::git-pull[] +--ff-only:: + Only update to the new history if there is no divergent local + history. This is the default when no method for reconciling + divergent histories is provided (via the --rebase=* flags). + +--ff:: +--no-ff:: + When merging rather than rebasing, specifies how a merge is + handled when the merged-in history is already a descendant of + the current history. If merging is requested, `--ff` is the + default unless merging an annotated (and possibly signed) tag + that is not stored in its natural place in the `refs/tags/` + hierarchy, in which case `--no-ff` is assumed. +endif::git-pull[] + With `--ff`, when possible resolve the merge as a fast-forward (only update the branch pointer to match the merged branch; do not create a @@ -55,9 +75,11 @@ descendant of the current history), create a merge commit. + With `--no-ff`, create a merge commit in all cases, even when the merge could instead be resolved as a fast-forward. +ifdef::git-merge[] + With `--ff-only`, resolve the merge as a fast-forward when possible. When not possible, refuse to merge and exit with a non-zero status. +endif::git-merge[] -S[<keyid>]:: --gpg-sign[=<keyid>]:: @@ -73,6 +95,9 @@ When not possible, refuse to merge and exit with a non-zero status. In addition to branch names, populate the log message with one-line descriptions from at most <n> actual commits that are being merged. See also linkgit:git-fmt-merge-msg[1]. +ifdef::git-pull[] + Only useful when merging. +endif::git-pull[] + With --no-log do not list one-line descriptions from the actual commits being merged. @@ -102,18 +127,25 @@ With --no-squash perform the merge and commit the result. This option can be used to override --squash. + With --squash, --commit is not allowed, and will fail. +ifdef::git-pull[] ++ +Only useful when merging. +endif::git-pull[] --no-verify:: This option bypasses the pre-merge and commit-msg hooks. See also linkgit:githooks[5]. +ifdef::git-pull[] + Only useful when merging. +endif::git-pull[] -s <strategy>:: --strategy=<strategy>:: Use the given merge strategy; can be supplied more than once to specify them in the order they should be tried. If there is no `-s` option, a built-in list of strategies - is used instead ('git merge-recursive' when merging a single - head, 'git merge-octopus' otherwise). + is used instead (`ort` when merging a single head, + `octopus` otherwise). -X <option>:: --strategy-option=<option>:: @@ -127,6 +159,10 @@ With --squash, --commit is not allowed, and will fail. default trust model, this means the signing key has been signed by a trusted key. If the tip commit of the side branch is not signed with a valid key, the merge is aborted. +ifdef::git-pull[] ++ +Only useful when merging. +endif::git-pull[] --summary:: --no-summary:: @@ -154,7 +190,8 @@ endif::git-pull[] --autostash:: --no-autostash:: Automatically create a temporary stash entry before the operation - begins, and apply it after the operation ends. This means + begins, record it in the special ref `MERGE_AUTOSTASH` + and apply it after the operation ends. This means that you can run the operation on a dirty worktree. However, use with care: the final stash application after a successful merge might result in non-trivial conflicts. @@ -166,3 +203,7 @@ endif::git-pull[] projects that started their lives independently. As that is a very rare occasion, no configuration variable to enable this by default exists and will not be added. +ifdef::git-pull[] ++ +Only useful when merging. +endif::git-pull[] diff --git a/Documentation/merge-strategies.txt b/Documentation/merge-strategies.txt index 2912de706b..5fc54ec060 100644 --- a/Documentation/merge-strategies.txt +++ b/Documentation/merge-strategies.txt @@ -6,28 +6,23 @@ backend 'merge strategies' to be chosen with `-s` option. Some strategies can also take their own options, which can be passed by giving `-X<option>` arguments to `git merge` and/or `git pull`. -resolve:: - This can only resolve two heads (i.e. the current branch - and another branch you pulled from) using a 3-way merge - algorithm. It tries to carefully detect criss-cross - merge ambiguities and is considered generally safe and - fast. - -recursive:: - This can only resolve two heads using a 3-way merge - algorithm. When there is more than one common - ancestor that can be used for 3-way merge, it creates a - merged tree of the common ancestors and uses that as - the reference tree for the 3-way merge. This has been - reported to result in fewer merge conflicts without - causing mismerges by tests done on actual merge commits - taken from Linux 2.6 kernel development history. - Additionally this can detect and handle merges involving - renames, but currently cannot make use of detected - copies. This is the default merge strategy when pulling - or merging one branch. +ort:: + This is the default merge strategy when pulling or merging one + branch. This strategy can only resolve two heads using a + 3-way merge algorithm. When there is more than one common + ancestor that can be used for 3-way merge, it creates a merged + tree of the common ancestors and uses that as the reference + tree for the 3-way merge. This has been reported to result in + fewer merge conflicts without causing mismerges by tests done + on actual merge commits taken from Linux 2.6 kernel + development history. Additionally this strategy can detect + and handle merges involving renames. It does not make use of + detected copies. The name for this algorithm is an acronym + ("Ostensibly Recursive's Twin") and came from the fact that it + was written as a replacement for the previous default + algorithm, `recursive`. + -The 'recursive' strategy can take the following options: +The 'ort' strategy can take the following options: ours;; This option forces conflicting hunks to be auto-resolved cleanly by @@ -43,19 +38,6 @@ theirs;; This is the opposite of 'ours'; note that, unlike 'ours', there is no 'theirs' merge strategy to confuse this merge option with. -patience;; - With this option, 'merge-recursive' spends a little extra time - to avoid mismerges that sometimes occur due to unimportant - matching lines (e.g., braces from distinct functions). Use - this when the branches to be merged have diverged wildly. - See also linkgit:git-diff[1] `--patience`. - -diff-algorithm=[patience|minimal|histogram|myers];; - Tells 'merge-recursive' to use a different diff algorithm, which - can help avoid mismerges that occur due to unimportant matching - lines (such as braces from distinct functions). See also - linkgit:git-diff[1] `--diff-algorithm`. - ignore-space-change;; ignore-all-space;; ignore-space-at-eol;; @@ -84,11 +66,6 @@ no-renormalize;; Disables the `renormalize` option. This overrides the `merge.renormalize` configuration variable. -no-renames;; - Turn off rename detection. This overrides the `merge.renames` - configuration variable. - See also linkgit:git-diff[1] `--no-renames`. - find-renames[=<n>];; Turn on rename detection, optionally setting the similarity threshold. This is the default. This overrides the @@ -105,6 +82,46 @@ subtree[=<path>];; is prefixed (or stripped from the beginning) to make the shape of two trees to match. +recursive:: + This can only resolve two heads using a 3-way merge + algorithm. When there is more than one common + ancestor that can be used for 3-way merge, it creates a + merged tree of the common ancestors and uses that as + the reference tree for the 3-way merge. This has been + reported to result in fewer merge conflicts without + causing mismerges by tests done on actual merge commits + taken from Linux 2.6 kernel development history. + Additionally this can detect and handle merges involving + renames. It does not make use of detected copies. This was + the default strategy for resolving two heads from Git v0.99.9k + until v2.33.0. ++ +The 'recursive' strategy takes the same options as 'ort'. However, +there are three additional options that 'ort' ignores (not documented +above) that are potentially useful with the 'recursive' strategy: + +patience;; + Deprecated synonym for `diff-algorithm=patience`. + +diff-algorithm=[patience|minimal|histogram|myers];; + Use a different diff algorithm while merging, which can help + avoid mismerges that occur due to unimportant matching lines + (such as braces from distinct functions). See also + linkgit:git-diff[1] `--diff-algorithm`. Note that `ort` + specifically uses `diff-algorithm=histogram`, while `recursive` + defaults to the `diff.algorithm` config setting. + +no-renames;; + Turn off rename detection. This overrides the `merge.renames` + configuration variable. + See also linkgit:git-diff[1] `--no-renames`. + +resolve:: + This can only resolve two heads (i.e. the current branch + and another branch you pulled from) using a 3-way merge + algorithm. It tries to carefully detect criss-cross + merge ambiguities. It does not handle renames. + octopus:: This resolves cases with more than two heads, but refuses to do a complex merge that needs manual resolution. It is @@ -121,13 +138,13 @@ ours:: the 'recursive' merge strategy. subtree:: - This is a modified recursive strategy. When merging trees A and + This is a modified `ort` strategy. When merging trees A and B, if B corresponds to a subtree of A, B is first adjusted to match the tree structure of A, instead of reading the trees at the same level. This adjustment is also done to the common ancestor tree. -With the strategies that use 3-way merge (including the default, 'recursive'), +With the strategies that use 3-way merge (including the default, 'ort'), if a change is made on both branches, but later reverted on one of the branches, that change will be present in the merged result; some people find this behavior confusing. It occurs because only the heads and the merge base diff --git a/Documentation/pretty-formats.txt b/Documentation/pretty-formats.txt index 84bbc7439a..ef6bd420ae 100644 --- a/Documentation/pretty-formats.txt +++ b/Documentation/pretty-formats.txt @@ -190,6 +190,8 @@ The placeholders are: '%ai':: author date, ISO 8601-like format '%aI':: author date, strict ISO 8601 format '%as':: author date, short format (`YYYY-MM-DD`) +'%ah':: author date, human style (like the `--date=human` option of + linkgit:git-rev-list[1]) '%cn':: committer name '%cN':: committer name (respecting .mailmap, see linkgit:git-shortlog[1] or linkgit:git-blame[1]) @@ -206,8 +208,23 @@ The placeholders are: '%ci':: committer date, ISO 8601-like format '%cI':: committer date, strict ISO 8601 format '%cs':: committer date, short format (`YYYY-MM-DD`) +'%ch':: committer date, human style (like the `--date=human` option of + linkgit:git-rev-list[1]) '%d':: ref names, like the --decorate option of linkgit:git-log[1] '%D':: ref names without the " (", ")" wrapping. +'%(describe[:options])':: human-readable name, like + linkgit:git-describe[1]; empty string for + undescribable commits. The `describe` string + may be followed by a colon and zero or more + comma-separated options. Descriptions can be + inconsistent when tags are added or removed at + the same time. ++ +** 'match=<pattern>': Only consider tags matching the given + `glob(7)` pattern, excluding the "refs/tags/" prefix. +** 'exclude=<pattern>': Do not consider tags matching the given + `glob(7)` pattern, excluding the "refs/tags/" prefix. + '%S':: ref name given on the command line by which the commit was reached (like `git log --source`), only works with `git log` '%e':: encoding @@ -252,7 +269,15 @@ endif::git-rev-list[] interpreted by linkgit:git-interpret-trailers[1]. The `trailers` string may be followed by a colon - and zero or more comma-separated options: + and zero or more comma-separated options. + If any option is provided multiple times the + last occurrence wins. ++ +The boolean options accept an optional value `[=<BOOL>]`. The values +`true`, `false`, `on`, `off` etc. are all accepted. See the "boolean" +sub-section in "EXAMPLES" in linkgit:git-config[1]. If a boolean +option is given with no value, it's enabled. ++ ** 'key=<K>': only show trailers with specified key. Matching is done case-insensitively and trailing colon is optional. If option is given multiple times trailer lines matching any of the keys are @@ -261,27 +286,25 @@ endif::git-rev-list[] desired it can be disabled with `only=false`. E.g., `%(trailers:key=Reviewed-by)` shows trailer lines with key `Reviewed-by`. -** 'only[=val]': select whether non-trailer lines from the trailer - block should be included. The `only` keyword may optionally be - followed by an equal sign and one of `true`, `on`, `yes` to omit or - `false`, `off`, `no` to show the non-trailer lines. If option is - given without value it is enabled. If given multiple times the last - value is used. +** 'only[=<BOOL>]': select whether non-trailer lines from the trailer + block should be included. ** 'separator=<SEP>': specify a separator inserted between trailer lines. When this option is not given each trailer line is terminated with a line feed character. The string SEP may contain the literal formatting codes described above. To use comma as separator one must use `%x2C` as it would otherwise be parsed as - next option. If separator option is given multiple times only the - last one is used. E.g., `%(trailers:key=Ticket,separator=%x2C )` + next option. E.g., `%(trailers:key=Ticket,separator=%x2C )` shows all trailer lines whose key is "Ticket" separated by a comma and a space. -** 'unfold[=val]': make it behave as if interpret-trailer's `--unfold` - option was given. In same way as to for `only` it can be followed - by an equal sign and explicit value. E.g., +** 'unfold[=<BOOL>]': make it behave as if interpret-trailer's `--unfold` + option was given. E.g., `%(trailers:only,unfold=true)` unfolds and shows all trailer lines. -** 'valueonly[=val]': skip over the key part of the trailer line and only - show the value part. Also this optionally allows explicit value. +** 'keyonly[=<BOOL>]': only show the key part of the trailer. +** 'valueonly[=<BOOL>]': only show the value part of the trailer. +** 'key_value_separator=<SEP>': specify a separator inserted between + trailer lines. When this option is not given each trailer key-value + pair is separated by ": ". Otherwise it shares the same semantics + as 'separator=<SEP>' above. NOTE: Some placeholders may depend on other options given to the revision traversal engine. For example, the `%g*` reflog options will diff --git a/Documentation/pretty-options.txt b/Documentation/pretty-options.txt index 27ddaf84a1..b3af850608 100644 --- a/Documentation/pretty-options.txt +++ b/Documentation/pretty-options.txt @@ -33,14 +33,16 @@ people using 80-column terminals. used together. --encoding=<encoding>:: - The commit objects record the encoding used for the log message + Commit objects record the character encoding used for the log message in their encoding header; this option can be used to tell the command to re-code the commit log message in the encoding preferred by the user. For non plumbing commands this defaults to UTF-8. Note that if an object claims to be encoded in `X` and we are outputting in `X`, we will output the object verbatim; this means that invalid sequences in the original - commit may be copied to the output. + commit may be copied to the output. Likewise, if iconv(3) fails + to convert the commit, we will output the original object + verbatim, along with a warning. --expand-tabs=<n>:: --expand-tabs:: diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 002379056a..b7bd27e171 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -129,6 +129,11 @@ parents) and `--max-parents=-1` (negative numbers denote no upper limit). adjusting to updated upstream from time to time, and this option allows you to ignore the individual commits brought in to your history by such a merge. +ifdef::git-log[] ++ +This option also changes default diff format for merge commits +to `first-parent`, see `--diff-merges=first-parent` for details. +endif::git-log[] --not:: Reverses the meaning of the '{caret}' prefix (or lack thereof) @@ -222,6 +227,15 @@ ifdef::git-rev-list[] test the exit status to see if a range of objects is fully connected (or not). It is faster than redirecting stdout to `/dev/null` as the output does not have to be formatted. + +--disk-usage:: + Suppress normal output; instead, print the sum of the bytes used + for on-disk storage by the selected commits or objects. This is + equivalent to piping the output into `git cat-file + --batch-check='%(objectsize:disk)'`, except that it runs much + faster (especially with `--use-bitmap-index`). See the `CAVEATS` + section in linkgit:git-cat-file[1] for the limitations of what + "on-disk storage" means. endif::git-rev-list[] --cherry-mark:: @@ -878,9 +892,12 @@ or units. n may be zero. The suffixes k, m, and g can be used to name units in KiB, MiB, or GiB. For example, 'blob:limit=1k' is the same as 'blob:limit=1024'. + +The form '--filter=object:type=(tag|commit|tree|blob)' omits all objects +which are not of the requested type. ++ The form '--filter=sparse:oid=<blob-ish>' uses a sparse-checkout specification contained in the blob (or blob-expression) '<blob-ish>' -to omit blobs that would not be not required for a sparse checkout on +to omit blobs that would not be required for a sparse checkout on the requested refs. + The form '--filter=tree:<depth>' omits all blobs and trees whose depth @@ -916,6 +933,11 @@ equivalent. --no-filter:: Turn off any previous `--filter=` argument. +--filter-provided-objects:: + Filter the list of explicitly provided objects, which would otherwise + always be printed even if they did not match any of the filters. Only + useful with `--filter=`. + --filter-print-omitted:: Only useful with `--filter=`; prints a list of the objects omitted by the filter. Object IDs are prefixed with a ``~'' character. @@ -946,6 +968,11 @@ list of the missing objects. Object IDs are prefixed with a ``?'' character. objects. endif::git-rev-list[] +--unsorted-input:: + Show commits in the order they were given on the command line instead + of sorting them in reverse chronological order by commit time. Cannot + be combined with `--no-walk` or `--no-walk=sorted`. + --no-walk[=(sorted|unsorted)]:: Only show the given commits, but do not traverse their ancestors. This has no effect if a range is specified. If the argument @@ -953,7 +980,8 @@ endif::git-rev-list[] given on the command line. Otherwise (if `sorted` or no argument was given), the commits are shown in reverse chronological order by commit time. - Cannot be combined with `--graph`. + Cannot be combined with `--graph`. Cannot be combined with + `--unsorted-input` if `sorted` or no argument was given. --do-walk:: Overrides a previous `--no-walk`. @@ -1042,6 +1070,14 @@ ifdef::git-rev-list[] --header:: Print the contents of the commit in raw-format; each record is separated with a NUL character. + +--no-commit-header:: + Suppress the header line containing "commit" and the object ID printed before + the specified format. This has no effect on the built-in formats; only custom + formats are affected. + +--commit-header:: + Overrides a previous `--no-commit-header`. endif::git-rev-list[] --parents:: diff --git a/Documentation/revisions.txt b/Documentation/revisions.txt index d9169c062e..f5f17b65a1 100644 --- a/Documentation/revisions.txt +++ b/Documentation/revisions.txt @@ -260,6 +260,9 @@ any of the given commits. A commit's reachable set is the commit itself and the commits in its ancestry chain. +There are several notations to specify a set of connected commits +(called a "revision range"), illustrated below. + Commit Exclusions ~~~~~~~~~~~~~~~~~ @@ -294,6 +297,26 @@ is a shorthand for 'HEAD..origin' and asks "What did the origin do since I forked from them?" Note that '..' would mean 'HEAD..HEAD' which is an empty range that is both reachable and unreachable from HEAD. +Commands that are specifically designed to take two distinct ranges +(e.g. "git range-diff R1 R2" to compare two ranges) do exist, but +they are exceptions. Unless otherwise noted, all "git" commands +that operate on a set of commits work on a single revision range. +In other words, writing two "two-dot range notation" next to each +other, e.g. + + $ git log A..B C..D + +does *not* specify two revision ranges for most commands. Instead +it will name a single connected set of commits, i.e. those that are +reachable from either B or D but are reachable from neither A or C. +In a linear history like this: + + ---A---B---o---o---C---D + +because A and B are reachable from C, the revision range specified +by these two dotted ranges is a single commit D. + + Other <rev>{caret} Parent Shorthand Notations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Three other shorthands exist, particularly useful for merge commits, diff --git a/Documentation/technical/api-error-handling.txt b/Documentation/technical/api-error-handling.txt index ceeedd485c..8be4f4d0d6 100644 --- a/Documentation/technical/api-error-handling.txt +++ b/Documentation/technical/api-error-handling.txt @@ -1,8 +1,11 @@ Error reporting in git ====================== -`die`, `usage`, `error`, and `warning` report errors of various -kinds. +`BUG`, `die`, `usage`, `error`, and `warning` report errors of +various kinds. + +- `BUG` is for failed internal assertions that should never happen, + i.e. a bug in git itself. - `die` is for fatal application errors. It prints a message to the user and exits with status 128. @@ -20,6 +23,9 @@ kinds. without running into too many problems. Like `error`, it returns -1 after reporting the situation to the caller. +These reports will be logged via the trace2 facility. See the "error" +event in link:api-trace2.txt[trace2 API]. + Customizable error handlers --------------------------- diff --git a/Documentation/technical/api-parse-options.txt b/Documentation/technical/api-parse-options.txt index 5a60bbfa7f..acfd5dc1d8 100644 --- a/Documentation/technical/api-parse-options.txt +++ b/Documentation/technical/api-parse-options.txt @@ -198,11 +198,6 @@ There are some macros to easily define options: The filename will be prefixed by passing the filename along with the prefix argument of `parse_options()` to `prefix_filename()`. -`OPT_ARGUMENT(long, &int_var, description)`:: - Introduce a long-option argument that will be kept in `argv[]`. - If this option was seen, `int_var` will be set to one (except - if a `NULL` pointer was passed). - `OPT_NUMBER_CALLBACK(&var, description, func_ptr)`:: Recognize numerical options like -123 and feed the integer as if it was an argument to the function given by `func_ptr`. diff --git a/Documentation/technical/api-simple-ipc.txt b/Documentation/technical/api-simple-ipc.txt new file mode 100644 index 0000000000..d79ad323e6 --- /dev/null +++ b/Documentation/technical/api-simple-ipc.txt @@ -0,0 +1,105 @@ +Simple-IPC API +============== + +The Simple-IPC API is a collection of `ipc_` prefixed library routines +and a basic communication protocol that allow an IPC-client process to +send an application-specific IPC-request message to an IPC-server +process and receive an application-specific IPC-response message. + +Communication occurs over a named pipe on Windows and a Unix domain +socket on other platforms. IPC-clients and IPC-servers rendezvous at +a previously agreed-to application-specific pathname (which is outside +the scope of this design) that is local to the computer system. + +The IPC-server routines within the server application process create a +thread pool to listen for connections and receive request messages +from multiple concurrent IPC-clients. When received, these messages +are dispatched up to the server application callbacks for handling. +IPC-server routines then incrementally relay responses back to the +IPC-client. + +The IPC-client routines within a client application process connect +to the IPC-server and send a request message and wait for a response. +When received, the response is returned back the caller. + +For example, the `fsmonitor--daemon` feature will be built as a server +application on top of the IPC-server library routines. It will have +threads watching for file system events and a thread pool waiting for +client connections. Clients, such as `git status` will request a list +of file system events since a point in time and the server will +respond with a list of changed files and directories. The formats of +the request and response are application-specific; the IPC-client and +IPC-server routines treat them as opaque byte streams. + + +Comparison with sub-process model +--------------------------------- + +The Simple-IPC mechanism differs from the existing `sub-process.c` +model (Documentation/technical/long-running-process-protocol.txt) and +used by applications like Git-LFS. In the LFS-style sub-process model +the helper is started by the foreground process, communication happens +via a pair of file descriptors bound to the stdin/stdout of the +sub-process, the sub-process only serves the current foreground +process, and the sub-process exits when the foreground process +terminates. + +In the Simple-IPC model the server is a very long-running service. It +can service many clients at the same time and has a private socket or +named pipe connection to each active client. It might be started +(on-demand) by the current client process or it might have been +started by a previous client or by the OS at boot time. The server +process is not associated with a terminal and it persists after +clients terminate. Clients do not have access to the stdin/stdout of +the server process and therefore must communicate over sockets or +named pipes. + + +Server startup and shutdown +--------------------------- + +How an application server based upon IPC-server is started is also +outside the scope of the Simple-IPC design and is a property of the +application using it. For example, the server might be started or +restarted during routine maintenance operations, or it might be +started as a system service during the system boot-up sequence, or it +might be started on-demand by a foreground Git command when needed. + +Similarly, server shutdown is a property of the application using +the simple-ipc routines. For example, the server might decide to +shutdown when idle or only upon explicit request. + + +Simple-IPC protocol +------------------- + +The Simple-IPC protocol consists of a single request message from the +client and an optional response message from the server. Both the +client and server messages are unlimited in length and are terminated +with a flush packet. + +The pkt-line routines (Documentation/technical/protocol-common.txt) +are used to simplify buffer management during message generation, +transmission, and reception. A flush packet is used to mark the end +of the message. This allows the sender to incrementally generate and +transmit the message. It allows the receiver to incrementally receive +the message in chunks and to know when they have received the entire +message. + +The actual byte format of the client request and server response +messages are application specific. The IPC layer transmits and +receives them as opaque byte buffers without any concern for the +content within. It is the job of the calling application layer to +understand the contents of the request and response messages. + + +Summary +------- + +Conceptually, the Simple-IPC protocol is similar to an HTTP REST +request. Clients connect, make an application-specific and +stateless request, receive an application-specific +response, and disconnect. It is a one round trip facility for +querying the server. The Simple-IPC routines hide the socket, +named pipe, and thread pool details and allow the application +layer to focus on the application at hand. diff --git a/Documentation/technical/api-trace2.txt b/Documentation/technical/api-trace2.txt index c65ffafc48..b9f3198fbe 100644 --- a/Documentation/technical/api-trace2.txt +++ b/Documentation/technical/api-trace2.txt @@ -396,14 +396,14 @@ only present on the "start" and "atexit" events. } ------------ -`"discard"`:: +`"too_many_files"`:: This event is written to the git-trace2-discard sentinel file if there are too many files in the target trace directory (see the trace2.maxFiles config option). + ------------ { - "event":"discard", + "event":"too_many_files", ... } ------------ @@ -465,7 +465,7 @@ completed.) ------------ `"error"`:: - This event is emitted when one of the `error()`, `die()`, + This event is emitted when one of the `BUG()`, `error()`, `die()`, `warning()`, or `usage()` functions are called. + ------------ @@ -493,6 +493,20 @@ about specific error arguments. } ------------ +`"cmd_ancestry"`:: + This event contains the text command name for the parent (and earlier + generations of parents) of the current process, in an array ordered from + nearest parent to furthest great-grandparent. It may not be implemented + on all platforms. ++ +------------ +{ + "event":"cmd_ancestry", + ... + "ancestry":["bash","tmux: server","systemd"] +} +------------ + `"cmd_name"`:: This event contains the command name for this git process and the hierarchy of commands from parent git processes. diff --git a/Documentation/technical/bitmap-format.txt b/Documentation/technical/bitmap-format.txt index f8c18a0f7a..04b3ec2178 100644 --- a/Documentation/technical/bitmap-format.txt +++ b/Documentation/technical/bitmap-format.txt @@ -1,6 +1,44 @@ GIT bitmap v1 format ==================== +== Pack and multi-pack bitmaps + +Bitmaps store reachability information about the set of objects in a packfile, +or a multi-pack index (MIDX). The former is defined obviously, and the latter is +defined as the union of objects in packs contained in the MIDX. + +A bitmap may belong to either one pack, or the repository's multi-pack index (if +it exists). A repository may have at most one bitmap. + +An object is uniquely described by its bit position within a bitmap: + + - If the bitmap belongs to a packfile, the __n__th bit corresponds to + the __n__th object in pack order. For a function `offset` which maps + objects to their byte offset within a pack, pack order is defined as + follows: + + o1 <= o2 <==> offset(o1) <= offset(o2) + + - If the bitmap belongs to a MIDX, the __n__th bit corresponds to the + __n__th object in MIDX order. With an additional function `pack` which + maps objects to the pack they were selected from by the MIDX, MIDX order + is defined as follows: + + o1 <= o2 <==> pack(o1) <= pack(o2) /\ offset(o1) <= offset(o2) + + The ordering between packs is done according to the MIDX's .rev file. + Notably, the preferred pack sorts ahead of all other packs. + +The on-disk representation (described below) of a bitmap is the same regardless +of whether or not that bitmap belongs to a packfile or a MIDX. The only +difference is the interpretation of the bits, which is described above. + +Certain bitmap extensions are supported (see: Appendix B). No extensions are +required for bitmaps corresponding to packfiles. For bitmaps that correspond to +MIDXs, both the bit-cache and rev-cache extensions are required. + +== On-disk format + - A header appears at the beginning: 4-byte signature: {'B', 'I', 'T', 'M'} @@ -14,17 +52,19 @@ GIT bitmap v1 format The following flags are supported: - BITMAP_OPT_FULL_DAG (0x1) REQUIRED - This flag must always be present. It implies that the bitmap - index has been generated for a packfile with full closure - (i.e. where every single object in the packfile can find - its parent links inside the same packfile). This is a - requirement for the bitmap index format, also present in JGit, - that greatly reduces the complexity of the implementation. + This flag must always be present. It implies that the + bitmap index has been generated for a packfile or + multi-pack index (MIDX) with full closure (i.e. where + every single object in the packfile/MIDX can find its + parent links inside the same packfile/MIDX). This is a + requirement for the bitmap index format, also present in + JGit, that greatly reduces the complexity of the + implementation. - BITMAP_OPT_HASH_CACHE (0x4) If present, the end of the bitmap file contains `N` 32-bit name-hash values, one per object in the - pack. The format and meaning of the name-hash is + pack/MIDX. The format and meaning of the name-hash is described below. 4-byte entry count (network byte order) @@ -33,7 +73,8 @@ GIT bitmap v1 format 20-byte checksum - The SHA1 checksum of the pack this bitmap index belongs to. + The SHA1 checksum of the pack/MIDX this bitmap index + belongs to. - 4 EWAH bitmaps that act as type indexes @@ -50,7 +91,7 @@ GIT bitmap v1 format - Tags In each bitmap, the `n`th bit is set to true if the `n`th object - in the packfile is of that type. + in the packfile or multi-pack index is of that type. The obvious consequence is that the OR of all 4 bitmaps will result in a full set (all bits set), and the AND of all 4 bitmaps will @@ -62,8 +103,9 @@ GIT bitmap v1 format Each entry contains the following: - 4-byte object position (network byte order) - The position **in the index for the packfile** where the - bitmap for this commit is found. + The position **in the index for the packfile or + multi-pack index** where the bitmap for this commit is + found. - 1-byte XOR-offset The xor offset used to compress this bitmap. For an entry @@ -146,10 +188,11 @@ Name-hash cache --------------- If the BITMAP_OPT_HASH_CACHE flag is set, the end of the bitmap contains -a cache of 32-bit values, one per object in the pack. The value at +a cache of 32-bit values, one per object in the pack/MIDX. The value at position `i` is the hash of the pathname at which the `i`th object -(counting in index order) in the pack can be found. This can be fed -into the delta heuristics to compare objects with similar pathnames. +(counting in index or multi-pack index order) in the pack/MIDX can be found. +This can be fed into the delta heuristics to compare objects with similar +pathnames. The hash algorithm used is: diff --git a/Documentation/technical/chunk-format.txt b/Documentation/technical/chunk-format.txt new file mode 100644 index 0000000000..593614fced --- /dev/null +++ b/Documentation/technical/chunk-format.txt @@ -0,0 +1,116 @@ +Chunk-based file formats +======================== + +Some file formats in Git use a common concept of "chunks" to describe +sections of the file. This allows structured access to a large file by +scanning a small "table of contents" for the remaining data. This common +format is used by the `commit-graph` and `multi-pack-index` files. See +link:technical/pack-format.html[the `multi-pack-index` format] and +link:technical/commit-graph-format.html[the `commit-graph` format] for +how they use the chunks to describe structured data. + +A chunk-based file format begins with some header information custom to +that format. That header should include enough information to identify +the file type, format version, and number of chunks in the file. From this +information, that file can determine the start of the chunk-based region. + +The chunk-based region starts with a table of contents describing where +each chunk starts and ends. This consists of (C+1) rows of 12 bytes each, +where C is the number of chunks. Consider the following table: + + | Chunk ID (4 bytes) | Chunk Offset (8 bytes) | + |--------------------|------------------------| + | ID[0] | OFFSET[0] | + | ... | ... | + | ID[C] | OFFSET[C] | + | 0x0000 | OFFSET[C+1] | + +Each row consists of a 4-byte chunk identifier (ID) and an 8-byte offset. +Each integer is stored in network-byte order. + +The chunk identifier `ID[i]` is a label for the data stored within this +fill from `OFFSET[i]` (inclusive) to `OFFSET[i+1]` (exclusive). Thus, the +size of the `i`th chunk is equal to the difference between `OFFSET[i+1]` +and `OFFSET[i]`. This requires that the chunk data appears contiguously +in the same order as the table of contents. + +The final entry in the table of contents must be four zero bytes. This +confirms that the table of contents is ending and provides the offset for +the end of the chunk-based data. + +Note: The chunk-based format expects that the file contains _at least_ a +trailing hash after `OFFSET[C+1]`. + +Functions for working with chunk-based file formats are declared in +`chunk-format.h`. Using these methods provide extra checks that assist +developers when creating new file formats. + +Writing chunk-based file formats +-------------------------------- + +To write a chunk-based file format, create a `struct chunkfile` by +calling `init_chunkfile()` and pass a `struct hashfile` pointer. The +caller is responsible for opening the `hashfile` and writing header +information so the file format is identifiable before the chunk-based +format begins. + +Then, call `add_chunk()` for each chunk that is intended for write. This +populates the `chunkfile` with information about the order and size of +each chunk to write. Provide a `chunk_write_fn` function pointer to +perform the write of the chunk data upon request. + +Call `write_chunkfile()` to write the table of contents to the `hashfile` +followed by each of the chunks. This will verify that each chunk wrote +the expected amount of data so the table of contents is correct. + +Finally, call `free_chunkfile()` to clear the `struct chunkfile` data. The +caller is responsible for finalizing the `hashfile` by writing the trailing +hash and closing the file. + +Reading chunk-based file formats +-------------------------------- + +To read a chunk-based file format, the file must be opened as a +memory-mapped region. The chunk-format API expects that the entire file +is mapped as a contiguous memory region. + +Initialize a `struct chunkfile` pointer with `init_chunkfile(NULL)`. + +After reading the header information from the beginning of the file, +including the chunk count, call `read_table_of_contents()` to populate +the `struct chunkfile` with the list of chunks, their offsets, and their +sizes. + +Extract the data information for each chunk using `pair_chunk()` or +`read_chunk()`: + +* `pair_chunk()` assigns a given pointer with the location inside the + memory-mapped file corresponding to that chunk's offset. If the chunk + does not exist, then the pointer is not modified. + +* `read_chunk()` takes a `chunk_read_fn` function pointer and calls it + with the appropriate initial pointer and size information. The function + is not called if the chunk does not exist. Use this method to read chunks + if you need to perform immediate parsing or if you need to execute logic + based on the size of the chunk. + +After calling these methods, call `free_chunkfile()` to clear the +`struct chunkfile` data. This will not close the memory-mapped region. +Callers are expected to own that data for the timeframe the pointers into +the region are needed. + +Examples +-------- + +These file formats use the chunk-format API, and can be used as examples +for future formats: + +* *commit-graph:* see `write_commit_graph_file()` and `parse_commit_graph()` + in `commit-graph.c` for how the chunk-format API is used to write and + parse the commit-graph file format documented in + link:technical/commit-graph-format.html[the commit-graph file format]. + +* *multi-pack-index:* see `write_midx_internal()` and `load_multi_pack_index()` + in `midx.c` for how the chunk-format API is used to write and + parse the multi-pack-index file format documented in + link:technical/pack-format.html[the multi-pack-index file format]. diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index b3b58880b9..87971c27dd 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -4,11 +4,7 @@ Git commit graph format The Git commit graph stores a list of commit OIDs and some associated metadata, including: -- The generation number of the commit. Commits with no parents have - generation number 1; commits with parents have generation number - one more than the maximum generation number of its parents. We - reserve zero as special, and can be used to mark a generation - number invalid or as "not computed". +- The generation number of the commit. - The root tree OID. @@ -65,6 +61,9 @@ CHUNK LOOKUP: the length using the next chunk position if necessary.) Each chunk ID appears at most once. + The CHUNK LOOKUP matches the table of contents from + link:technical/chunk-format.html[the chunk-based file format]. + The remaining data in the body is described one chunk at a time, and these chunks may be given in any order. Chunks are required unless otherwise specified. @@ -86,13 +85,33 @@ CHUNK DATA: position. If there are more than two parents, the second value has its most-significant bit on and the other bits store an array position into the Extra Edge List chunk. - * The next 8 bytes store the generation number of the commit and + * The next 8 bytes store the topological level (generation number v1) + of the commit and the commit time in seconds since EPOCH. The generation number uses the higher 30 bits of the first 4 bytes, while the commit time uses the 32 bits of the second 4 bytes, along with the lowest 2 bits of the lowest byte, storing the 33rd and 34th bit of the commit time. + Generation Data (ID: {'G', 'D', 'A', 'T' }) (N * 4 bytes) [Optional] + * This list of 4-byte values store corrected commit date offsets for the + commits, arranged in the same order as commit data chunk. + * If the corrected commit date offset cannot be stored within 31 bits, + the value has its most-significant bit on and the other bits store + the position of corrected commit date into the Generation Data Overflow + chunk. + * Generation Data chunk is present only when commit-graph file is written + by compatible versions of Git and in case of split commit-graph chains, + the topmost layer also has Generation Data chunk. + + Generation Data Overflow (ID: {'G', 'D', 'O', 'V' }) [Optional] + * This list of 8-byte values stores the corrected commit date offsets + for commits with corrected commit date offsets that cannot be + stored within 31 bits. + * Generation Data Overflow chunk is present only when Generation Data + chunk is present and atleast one corrected commit date offset cannot + be stored within 31 bits. + Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] This list of 4-byte values store the second through nth parents for all octopus merges. The second parent value in the commit data stores diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index f14a7659aa..f05e7bda1a 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -38,14 +38,31 @@ A consumer may load the following info for a commit from the graph: Values 1-4 satisfy the requirements of parse_commit_gently(). -Define the "generation number" of a commit recursively as follows: +There are two definitions of generation number: +1. Corrected committer dates (generation number v2) +2. Topological levels (generation nummber v1) - * A commit with no parents (a root commit) has generation number one. +Define "corrected committer date" of a commit recursively as follows: - * A commit with at least one parent has generation number one more than - the largest generation number among its parents. + * A commit with no parents (a root commit) has corrected committer date + equal to its committer date. -Equivalently, the generation number of a commit A is one more than the + * A commit with at least one parent has corrected committer date equal to + the maximum of its commiter date and one more than the largest corrected + committer date among its parents. + + * As a special case, a root commit with timestamp zero has corrected commit + date of 1, to be able to distinguish it from GENERATION_NUMBER_ZERO + (that is, an uncomputed corrected commit date). + +Define the "topological level" of a commit recursively as follows: + + * A commit with no parents (a root commit) has topological level of one. + + * A commit with at least one parent has topological level one more than + the largest topological level among its parents. + +Equivalently, the topological level of a commit A is one more than the length of a longest path from A to a root commit. The recursive definition is easier to use for computation and observing the following property: @@ -60,6 +77,9 @@ is easier to use for computation and observing the following property: generation numbers, then we always expand the boundary commit with highest generation number and can easily detect the stopping condition. +The property applies to both versions of generation number, that is both +corrected committer dates and topological levels. + This property can be used to significantly reduce the time it takes to walk commits and determine topological relationships. Without generation numbers, the general heuristic is the following: @@ -67,7 +87,9 @@ numbers, the general heuristic is the following: If A and B are commits with commit time X and Y, respectively, and X < Y, then A _probably_ cannot reach B. -This heuristic is currently used whenever the computation is allowed to +In absence of corrected commit dates (for example, old versions of Git or +mixed generation graph chains), +this heuristic is currently used whenever the computation is allowed to violate topological relationships due to clock skew (such as "git log" with default order), but is not used when the topological order is required (such as merge base calculations, "git log --graph"). @@ -77,7 +99,7 @@ in the commit graph. We can treat these commits as having "infinite" generation number and walk until reaching commits with known generation number. -We use the macro GENERATION_NUMBER_INFINITY = 0xFFFFFFFF to mark commits not +We use the macro GENERATION_NUMBER_INFINITY to mark commits not in the commit-graph file. If a commit-graph file was written by a version of Git that did not compute generation numbers, then those commits will have generation number represented by the macro GENERATION_NUMBER_ZERO = 0. @@ -93,12 +115,12 @@ fully-computed generation numbers. Using strict inequality may result in walking a few extra commits, but the simplicity in dealing with commits with generation number *_INFINITY or *_ZERO is valuable. -We use the macro GENERATION_NUMBER_MAX = 0x3FFFFFFF to for commits whose -generation numbers are computed to be at least this value. We limit at -this value since it is the largest value that can be stored in the -commit-graph file using the 30 bits available to generation numbers. This -presents another case where a commit can have generation number equal to -that of a parent. +We use the macro GENERATION_NUMBER_V1_MAX = 0x3FFFFFFF for commits whose +topological levels (generation number v1) are computed to be at least +this value. We limit at this value since it is the largest value that +can be stored in the commit-graph file using the 30 bits available +to topological levels. This presents another case where a commit can +have generation number equal to that of a parent. Design Details -------------- @@ -267,6 +289,35 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum number of commits) could be extracted into config settings for full flexibility. +## Handling Mixed Generation Number Chains + +With the introduction of generation number v2 and generation data chunk, the +following scenario is possible: + +1. "New" Git writes a commit-graph with the corrected commit dates. +2. "Old" Git writes a split commit-graph on top without corrected commit dates. + +A naive approach of using the newest available generation number from +each layer would lead to violated expectations: the lower layer would +use corrected commit dates which are much larger than the topological +levels of the higher layer. For this reason, Git inspects the topmost +layer to see if the layer is missing corrected commit dates. In such a case +Git only uses topological level for generation numbers. + +When writing a new layer in split commit-graph, we write corrected commit +dates if the topmost layer has corrected commit dates written. This +guarantees that if a layer has corrected commit dates, all lower layers +must have corrected commit dates as well. + +When merging layers, we do not consider whether the merged layers had corrected +commit dates. Instead, the new layer will have corrected commit dates if the +layer below the new layer has corrected commit dates. + +While writing or merging layers, if the new layer is the only layer, it will +have corrected commit dates when written by compatible versions of Git. Thus, +rewriting split commit-graph as a single file (`--split=replace`) creates a +single layer with corrected commit dates. + ## Deleting graph-{hash} files After a new tip file is written, some `graph-{hash}` files may no longer diff --git a/Documentation/technical/directory-rename-detection.txt b/Documentation/technical/directory-rename-detection.txt index 49b83ef3cc..029ee2cedc 100644 --- a/Documentation/technical/directory-rename-detection.txt +++ b/Documentation/technical/directory-rename-detection.txt @@ -2,9 +2,9 @@ Directory rename detection ========================== Rename detection logic in diffcore-rename that checks for renames of -individual files is aggregated and analyzed in merge-recursive for cases -where combinations of renames indicate that a full directory has been -renamed. +individual files is also aggregated there and then analyzed in either +merge-ort or merge-recursive for cases where combinations of renames +indicate that a full directory has been renamed. Scope of abilities ------------------ @@ -88,9 +88,11 @@ directory rename detection support in: Folks have requested in the past that `git diff` detect directory renames and somehow simplify its output. It is not clear whether this would be desirable or how the output should be simplified, so this was - simply not implemented. Further, to implement this, directory rename - detection logic would need to move from merge-recursive to - diffcore-rename. + simply not implemented. Also, while diffcore-rename has most of the + logic for detecting directory renames, some of the logic is still found + within merge-ort and merge-recursive. Fully supporting directory + rename detection in diffs would require copying or moving the remaining + bits of logic to the diff machinery. * am diff --git a/Documentation/technical/hash-function-transition.txt b/Documentation/technical/hash-function-transition.txt index 6fd20ebbc2..260224b033 100644 --- a/Documentation/technical/hash-function-transition.txt +++ b/Documentation/technical/hash-function-transition.txt @@ -33,16 +33,9 @@ researchers. On 23 February 2017 the SHAttered attack Git v2.13.0 and later subsequently moved to a hardened SHA-1 implementation by default, which isn't vulnerable to the SHAttered -attack. +attack, but SHA-1 is still weak. -Thus Git has in effect already migrated to a new hash that isn't SHA-1 -and doesn't share its vulnerabilities, its new hash function just -happens to produce exactly the same output for all known inputs, -except two PDFs published by the SHAttered researchers, and the new -implementation (written by those researchers) claims to detect future -cryptanalytic collision attacks. - -Regardless, it's considered prudent to move past any variant of SHA-1 +Thus it's considered prudent to move past any variant of SHA-1 to a new hash. There's no guarantee that future attacks on SHA-1 won't be published in the future, and those attacks may not have viable mitigations. @@ -57,6 +50,38 @@ SHA-1 still possesses the other properties such as fast object lookup and safe error checking, but other hash functions are equally suitable that are believed to be cryptographically secure. +Choice of Hash +-------------- +The hash to replace the hardened SHA-1 should be stronger than SHA-1 +was: we would like it to be trustworthy and useful in practice for at +least 10 years. + +Some other relevant properties: + +1. A 256-bit hash (long enough to match common security practice; not + excessively long to hurt performance and disk usage). + +2. High quality implementations should be widely available (e.g., in + OpenSSL and Apple CommonCrypto). + +3. The hash function's properties should match Git's needs (e.g. Git + requires collision and 2nd preimage resistance and does not require + length extension resistance). + +4. As a tiebreaker, the hash should be fast to compute (fortunately + many contenders are faster than SHA-1). + +There were several contenders for a successor hash to SHA-1, including +SHA-256, SHA-512/256, SHA-256x16, K12, and BLAKE2bp-256. + +In late 2018 the project picked SHA-256 as its successor hash. + +See 0ed8d8da374 (doc hash-function-transition: pick SHA-256 as +NewHash, 2018-08-04) and numerous mailing list threads at the time, +particularly the one starting at +https://lore.kernel.org/git/20180609224913.GC38834@genre.crustytoothpaste.net/ +for more information. + Goals ----- 1. The transition to SHA-256 can be done one local repository at a time. @@ -94,7 +119,7 @@ Overview -------- We introduce a new repository format extension. Repositories with this extension enabled use SHA-256 instead of SHA-1 to name their objects. -This affects both object names and object content --- both the names +This affects both object names and object content -- both the names of objects and all references to other objects within an object are switched to the new hash function. @@ -107,7 +132,7 @@ mapping to allow naming objects using either their SHA-1 and SHA-256 names interchangeably. "git cat-file" and "git hash-object" gain options to display an object -in its sha1 form and write an object given its sha1 form. This +in its SHA-1 form and write an object given its SHA-1 form. This requires all objects referenced by that object to be present in the object database so that they can be named using the appropriate name (using the bidirectional hash mapping). @@ -115,7 +140,7 @@ object database so that they can be named using the appropriate name Fetches from a SHA-1 based server convert the fetched objects into SHA-256 form and record the mapping in the bidirectional mapping table (see below for details). Pushes to a SHA-1 based server convert the -objects being pushed into sha1 form so the server does not have to be +objects being pushed into SHA-1 form so the server does not have to be aware of the hash function the client is using. Detailed Design @@ -151,38 +176,38 @@ repository extensions. Object names ~~~~~~~~~~~~ -Objects can be named by their 40 hexadecimal digit sha1-name or 64 -hexadecimal digit sha256-name, plus names derived from those (see +Objects can be named by their 40 hexadecimal digit SHA-1 name or 64 +hexadecimal digit SHA-256 name, plus names derived from those (see gitrevisions(7)). -The sha1-name of an object is the SHA-1 of the concatenation of its -type, length, a nul byte, and the object's sha1-content. This is the +The SHA-1 name of an object is the SHA-1 of the concatenation of its +type, length, a nul byte, and the object's SHA-1 content. This is the traditional <sha1> used in Git to name objects. -The sha256-name of an object is the SHA-256 of the concatenation of its -type, length, a nul byte, and the object's sha256-content. +The SHA-256 name of an object is the SHA-256 of the concatenation of its +type, length, a nul byte, and the object's SHA-256 content. Object format ~~~~~~~~~~~~~ The content as a byte sequence of a tag, commit, or tree object named -by sha1 and sha256 differ because an object named by sha256-name refers to -other objects by their sha256-names and an object named by sha1-name -refers to other objects by their sha1-names. +by SHA-1 and SHA-256 differ because an object named by SHA-256 name refers to +other objects by their SHA-256 names and an object named by SHA-1 name +refers to other objects by their SHA-1 names. -The sha256-content of an object is the same as its sha1-content, except -that objects referenced by the object are named using their sha256-names -instead of sha1-names. Because a blob object does not refer to any -other object, its sha1-content and sha256-content are the same. +The SHA-256 content of an object is the same as its SHA-1 content, except +that objects referenced by the object are named using their SHA-256 names +instead of SHA-1 names. Because a blob object does not refer to any +other object, its SHA-1 content and SHA-256 content are the same. -The format allows round-trip conversion between sha256-content and -sha1-content. +The format allows round-trip conversion between SHA-256 content and +SHA-1 content. Object storage ~~~~~~~~~~~~~~ Loose objects use zlib compression and packed objects use the packed format described in Documentation/technical/pack-format.txt, just like -today. The content that is compressed and stored uses sha256-content -instead of sha1-content. +today. The content that is compressed and stored uses SHA-256 content +instead of SHA-1 content. Pack index ~~~~~~~~~~ @@ -191,21 +216,21 @@ hash functions. They have the following format (all integers are in network byte order): - A header appears at the beginning and consists of the following: - - The 4-byte pack index signature: '\377t0c' - - 4-byte version number: 3 - - 4-byte length of the header section, including the signature and + * The 4-byte pack index signature: '\377t0c' + * 4-byte version number: 3 + * 4-byte length of the header section, including the signature and version number - - 4-byte number of objects contained in the pack - - 4-byte number of object formats in this pack index: 2 - - For each object format: - - 4-byte format identifier (e.g., 'sha1' for SHA-1) - - 4-byte length in bytes of shortened object names. This is the + * 4-byte number of objects contained in the pack + * 4-byte number of object formats in this pack index: 2 + * For each object format: + ** 4-byte format identifier (e.g., 'sha1' for SHA-1) + ** 4-byte length in bytes of shortened object names. This is the shortest possible length needed to make names in the shortened object name table unambiguous. - - 4-byte integer, recording where tables relating to this format + ** 4-byte integer, recording where tables relating to this format are stored in this index file, as an offset from the beginning. - - 4-byte offset to the trailer from the beginning of this file. - - Zero or more additional key/value pairs (4-byte key, 4-byte + * 4-byte offset to the trailer from the beginning of this file. + * Zero or more additional key/value pairs (4-byte key, 4-byte value). Only one key is supported: 'PSRC'. See the "Loose objects and unreachable objects" section for supported values and how this is used. All other keys are reserved. Readers must ignore @@ -213,37 +238,36 @@ network byte order): - Zero or more NUL bytes. This can optionally be used to improve the alignment of the full object name table below. - Tables for the first object format: - - A sorted table of shortened object names. These are prefixes of + * A sorted table of shortened object names. These are prefixes of the names of all objects in this pack file, packed together without offset values to reduce the cache footprint of the binary search for a specific object name. - - A table of full object names in pack order. This allows resolving + * A table of full object names in pack order. This allows resolving a reference to "the nth object in the pack file" (from a reachability bitmap or from the next table of another object format) to its object name. - - A table of 4-byte values mapping object name order to pack order. + * A table of 4-byte values mapping object name order to pack order. For an object in the table of sorted shortened object names, the value at the corresponding index in this table is the index in the previous table for that same object. - This can be used to look up the object in reachability bitmaps or to look up its name in another object format. - - A table of 4-byte CRC32 values of the packed object data, in the + * A table of 4-byte CRC32 values of the packed object data, in the order that the objects appear in the pack file. This is to allow compressed data to be copied directly from pack to pack during repacking without undetected data corruption. - - A table of 4-byte offset values. For an object in the table of + * A table of 4-byte offset values. For an object in the table of sorted shortened object names, the value at the corresponding index in this table indicates where that object can be found in the pack file. These are usually 31-bit pack file offsets, but large offsets are encoded as an index into the next table with the most significant bit set. - - A table of 8-byte offset entries (empty for pack files less than + * A table of 8-byte offset entries (empty for pack files less than 2 GiB). Pack files are organized with heavily used objects toward the front, so most object references should not need to refer to this table. @@ -252,10 +276,10 @@ network byte order): up to and not including the table of CRC32 values. - Zero or more NUL bytes. - The trailer consists of the following: - - A copy of the 20-byte SHA-256 checksum at the end of the + * A copy of the 20-byte SHA-256 checksum at the end of the corresponding packfile. - - 20-byte SHA-256 checksum of all of the above. + * 20-byte SHA-256 checksum of all of the above. Loose object index ~~~~~~~~~~~~~~~~~~ @@ -288,18 +312,18 @@ To remove entries (e.g. in "git pack-refs" or "git-prune"): Translation table ~~~~~~~~~~~~~~~~~ -The index files support a bidirectional mapping between sha1-names -and sha256-names. The lookup proceeds similarly to ordinary object -lookups. For example, to convert a sha1-name to a sha256-name: +The index files support a bidirectional mapping between SHA-1 names +and SHA-256 names. The lookup proceeds similarly to ordinary object +lookups. For example, to convert a SHA-1 name to a SHA-256 name: 1. Look for the object in idx files. If a match is present in the - idx's sorted list of truncated sha1-names, then: - a. Read the corresponding entry in the sha1-name order to pack + idx's sorted list of truncated SHA-1 names, then: + a. Read the corresponding entry in the SHA-1 name order to pack name order mapping. - b. Read the corresponding entry in the full sha1-name table to + b. Read the corresponding entry in the full SHA-1 name table to verify we found the right object. If it is, then - c. Read the corresponding entry in the full sha256-name table. - That is the object's sha256-name. + c. Read the corresponding entry in the full SHA-256 name table. + That is the object's SHA-256 name. 2. Check for a loose object. Read lines from loose-object-idx until we find a match. @@ -313,10 +337,10 @@ Since all operations that make new objects (e.g., "git commit") add the new objects to the corresponding index, this mapping is possible for all objects in the object store. -Reading an object's sha1-content -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The sha1-content of an object can be read by converting all sha256-names -its sha256-content references to sha1-names using the translation table. +Reading an object's SHA-1 content +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The SHA-1 content of an object can be read by converting all SHA-256 names +of its SHA-256 content references to SHA-1 names using the translation table. Fetch ~~~~~ @@ -339,7 +363,7 @@ the following steps: 1. index-pack: inflate each object in the packfile and compute its SHA-1. Objects can contain deltas in OBJ_REF_DELTA format against objects the client has locally. These objects can be looked up - using the translation table and their sha1-content read as + using the translation table and their SHA-1 content read as described above to resolve the deltas. 2. topological sort: starting at the "want"s from the negotiation phase, walk through objects in the pack and emit a list of them, @@ -348,12 +372,12 @@ the following steps: (This list only contains objects reachable from the "wants". If the pack from the server contained additional extraneous objects, then they will be discarded.) -3. convert to sha256: open a new (sha256) packfile. Read the topologically +3. convert to SHA-256: open a new SHA-256 packfile. Read the topologically sorted list just generated. For each object, inflate its - sha1-content, convert to sha256-content, and write it to the sha256 - pack. Record the new sha1<->sha256 mapping entry for use in the idx. + SHA-1 content, convert to SHA-256 content, and write it to the SHA-256 + pack. Record the new SHA-1<-->SHA-256 mapping entry for use in the idx. 4. sort: reorder entries in the new pack to match the order of objects - in the pack the server generated and include blobs. Write a sha256 idx + in the pack the server generated and include blobs. Write a SHA-256 idx file 5. clean up: remove the SHA-1 based pack file, index, and topologically sorted list obtained from the server in steps 1 @@ -378,19 +402,20 @@ experimenting to get this to perform well. Push ~~~~ Push is simpler than fetch because the objects referenced by the -pushed objects are already in the translation table. The sha1-content +pushed objects are already in the translation table. The SHA-1 content of each object being pushed can be read as described in the "Reading -an object's sha1-content" section to generate the pack written by git +an object's SHA-1 content" section to generate the pack written by git send-pack. Signed Commits ~~~~~~~~~~~~~~ We add a new field "gpgsig-sha256" to the commit object format to allow signing commits without relying on SHA-1. It is similar to the -existing "gpgsig" field. Its signed payload is the sha256-content of the +existing "gpgsig" field. Its signed payload is the SHA-256 content of the commit object with any "gpgsig" and "gpgsig-sha256" fields removed. This means commits can be signed + 1. using SHA-1 only, as in existing signed commit objects 2. using both SHA-1 and SHA-256, by using both gpgsig-sha256 and gpgsig fields. @@ -404,10 +429,11 @@ Signed Tags ~~~~~~~~~~~ We add a new field "gpgsig-sha256" to the tag object format to allow signing tags without relying on SHA-1. Its signed payload is the -sha256-content of the tag with its gpgsig-sha256 field and "-----BEGIN PGP +SHA-256 content of the tag with its gpgsig-sha256 field and "-----BEGIN PGP SIGNATURE-----" delimited in-body signature removed. This means tags can be signed + 1. using SHA-1 only, as in existing signed tag objects 2. using both SHA-1 and SHA-256, by using gpgsig-sha256 and an in-body signature. @@ -415,11 +441,11 @@ This means tags can be signed Mergetag embedding ~~~~~~~~~~~~~~~~~~ -The mergetag field in the sha1-content of a commit contains the -sha1-content of a tag that was merged by that commit. +The mergetag field in the SHA-1 content of a commit contains the +SHA-1 content of a tag that was merged by that commit. -The mergetag field in the sha256-content of the same commit contains the -sha256-content of the same tag. +The mergetag field in the SHA-256 content of the same commit contains the +SHA-256 content of the same tag. Submodules ~~~~~~~~~~ @@ -494,7 +520,7 @@ Caveats ------- Invalid objects ~~~~~~~~~~~~~~~ -The conversion from sha1-content to sha256-content retains any +The conversion from SHA-1 content to SHA-256 content retains any brokenness in the original object (e.g., tree entry modes encoded with leading 0, tree objects whose paths are not sorted correctly, and commit objects without an author or committer). This is a deliberate @@ -513,15 +539,15 @@ allow lifting this restriction. Alternates ~~~~~~~~~~ -For the same reason, a sha256 repository cannot borrow objects from a -sha1 repository using objects/info/alternates or +For the same reason, a SHA-256 repository cannot borrow objects from a +SHA-1 repository using objects/info/alternates or $GIT_ALTERNATE_OBJECT_REPOSITORIES. git notes ~~~~~~~~~ -The "git notes" tool annotates objects using their sha1-name as key. +The "git notes" tool annotates objects using their SHA-1 name as key. This design does not describe a way to migrate notes trees to use -sha256-names. That migration is expected to happen separately (for +SHA-256 names. That migration is expected to happen separately (for example using a file at the root of the notes tree to describe which hash it uses). @@ -555,7 +581,7 @@ unclear: Git 2.12 -Does this mean Git v2.12.0 is the commit with sha1-name +Does this mean Git v2.12.0 is the commit with SHA-1 name e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7 or the commit with new-40-digit-hash-name e7e07d5a4fcc2a203d9873968ad3e6bd4d7419d7? @@ -573,7 +599,7 @@ supports four different modes of operation: convert any object names written to output to SHA-1, but store objects using SHA-256. This allows users to test the code with no visible behavior change except for performance. This allows - allows running even tests that assume the SHA-1 hash function, to + running even tests that assume the SHA-1 hash function, to sanity-check the behavior of the new mode. 2. ("early transition") Allow both SHA-1 and SHA-256 object names in @@ -598,44 +624,12 @@ The user can also explicitly specify which format to use for a particular revision specifier and for output, overriding the mode. For example: -git --output-format=sha1 log abac87a^{sha1}..f787cac^{sha256} - -Choice of Hash --------------- -In early 2005, around the time that Git was written, Xiaoyun Wang, -Yiqun Lisa Yin, and Hongbo Yu announced an attack finding SHA-1 -collisions in 2^69 operations. In August they published details. -Luckily, no practical demonstrations of a collision in full SHA-1 were -published until 10 years later, in 2017. - -Git v2.13.0 and later subsequently moved to a hardened SHA-1 -implementation by default that mitigates the SHAttered attack, but -SHA-1 is still believed to be weak. - -The hash to replace this hardened SHA-1 should be stronger than SHA-1 -was: we would like it to be trustworthy and useful in practice for at -least 10 years. - -Some other relevant properties: - -1. A 256-bit hash (long enough to match common security practice; not - excessively long to hurt performance and disk usage). - -2. High quality implementations should be widely available (e.g., in - OpenSSL and Apple CommonCrypto). - -3. The hash function's properties should match Git's needs (e.g. Git - requires collision and 2nd preimage resistance and does not require - length extension resistance). - -4. As a tiebreaker, the hash should be fast to compute (fortunately - many contenders are faster than SHA-1). - -We choose SHA-256. + git --output-format=sha1 log abac87a^{sha1}..f787cac^{sha256} Transition plan --------------- Some initial steps can be implemented independently of one another: + - adding a hash function API (vtable) - teaching fsck to tolerate the gpgsig-sha256 field - excluding gpgsig-* from the fields copied by "git commit --amend" @@ -647,9 +641,9 @@ Some initial steps can be implemented independently of one another: - introducing index v3 - adding support for the PSRC field and safer object pruning - The first user-visible change is the introduction of the objectFormat extension (without compatObjectFormat). This requires: + - teaching fsck about this mode of operation - using the hash function API (vtable) when computing object names - signing objects and verifying signatures @@ -657,6 +651,7 @@ extension (without compatObjectFormat). This requires: repository Next comes introduction of compatObjectFormat: + - implementing the loose-object-idx - translating object names between object formats - translating object content between object formats @@ -669,10 +664,11 @@ Next comes introduction of compatObjectFormat: "Object names on the command line" above) The next step is supporting fetches and pushes to SHA-1 repositories: + - allow pushes to a repository using the compat format - generate a topologically sorted list of the SHA-1 names of fetched objects -- convert the fetched packfile to sha256 format and generate an idx +- convert the fetched packfile to SHA-256 format and generate an idx file - re-sort to match the order of objects in the fetched packfile @@ -734,6 +730,7 @@ Using hash functions in parallel Objects newly created would be addressed by the new hash, but inside such an object (e.g. commit) it is still possible to address objects using the old hash function. + * You cannot trust its history (needed for bisectability) in the future without further work * Maintenance burden as the number of supported hash functions grows @@ -743,36 +740,38 @@ using the old hash function. Signed objects with multiple hashes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instead of introducing the gpgsig-sha256 field in commit and tag objects -for sha256-content based signatures, an earlier version of this design -added "hash sha256 <sha256-name>" fields to strengthen the existing -sha1-content based signatures. +for SHA-256 content based signatures, an earlier version of this design +added "hash sha256 <SHA-256 name>" fields to strengthen the existing +SHA-1 content based signatures. In other words, a single signature was used to attest to the object content using both hash functions. This had some advantages: + * Using one signature instead of two speeds up the signing process. * Having one signed payload with both hashes allows the signer to - attest to the sha1-name and sha256-name referring to the same object. + attest to the SHA-1 name and SHA-256 name referring to the same object. * All users consume the same signature. Broken signatures are likely to be detected quickly using current versions of git. However, it also came with disadvantages: -* Verifying a signed object requires access to the sha1-names of all + +* Verifying a signed object requires access to the SHA-1 names of all objects it references, even after the transition is complete and translation table is no longer needed for anything else. To support - this, the design added fields such as "hash sha1 tree <sha1-name>" - and "hash sha1 parent <sha1-name>" to the sha256-content of a signed + this, the design added fields such as "hash sha1 tree <SHA-1 name>" + and "hash sha1 parent <SHA-1 name>" to the SHA-256 content of a signed commit, complicating the conversion process. -* Allowing signed objects without a sha1 (for after the transition is +* Allowing signed objects without a SHA-1 (for after the transition is complete) complicated the design further, requiring a "nohash sha1" - field to suppress including "hash sha1" fields in the sha256-content + field to suppress including "hash sha1" fields in the SHA-256 content and signed payload. Lazily populated translation table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some of the work of building the translation table could be deferred to push time, but that would significantly complicate and slow down pushes. -Calculating the sha1-name at object creation time at the same time it is -being streamed to disk and having its sha256-name calculated should be +Calculating the SHA-1 name at object creation time at the same time it is +being streamed to disk and having its SHA-256 name calculated should be an acceptable cost. Document History @@ -782,18 +781,19 @@ Document History bmwill@google.com, jonathantanmy@google.com, jrnieder@gmail.com, sbeller@google.com -Initial version sent to -http://lore.kernel.org/git/20170304011251.GA26789@aiede.mtv.corp.google.com +* Initial version sent to https://lore.kernel.org/git/20170304011251.GA26789@aiede.mtv.corp.google.com 2017-03-03 jrnieder@gmail.com Incorporated suggestions from jonathantanmy and sbeller: -* describe purpose of signed objects with each hash type -* redefine signed object verification using object content under the + +* Describe purpose of signed objects with each hash type +* Redefine signed object verification using object content under the first hash function 2017-03-06 jrnieder@gmail.com + * Use SHA3-256 instead of SHA2 (thanks, Linus and brian m. carlson).[1][2] -* Make sha3-based signatures a separate field, avoiding the need for +* Make SHA3-based signatures a separate field, avoiding the need for "hash" and "nohash" fields (thanks to peff[3]). * Add a sorting phase to fetch (thanks to Junio for noticing the need for this). @@ -805,23 +805,26 @@ Incorporated suggestions from jonathantanmy and sbeller: especially Junio). 2017-09-27 jrnieder@gmail.com, sbeller@google.com -* use placeholder NewHash instead of SHA3-256 -* describe criteria for picking a hash function. -* include a transition plan (thanks especially to Brandon Williams + +* Use placeholder NewHash instead of SHA3-256 +* Describe criteria for picking a hash function. +* Include a transition plan (thanks especially to Brandon Williams for fleshing these ideas out) -* define the translation table (thanks, Shawn Pearce[5], Jonathan +* Define the translation table (thanks, Shawn Pearce[5], Jonathan Tan, and Masaya Suzuki) -* avoid loose object overhead by packing more aggressively in +* Avoid loose object overhead by packing more aggressively in "git gc --auto" Later history: - See the history of this file in git.git for the history of subsequent - edits. This document history is no longer being maintained as it - would now be superfluous to the commit log +* See the history of this file in git.git for the history of subsequent + edits. This document history is no longer being maintained as it + would now be superfluous to the commit log + +References: -[1] http://lore.kernel.org/git/CA+55aFzJtejiCjV0e43+9oR3QuJK2PiFiLQemytoLpyJWe6P9w@mail.gmail.com/ -[2] http://lore.kernel.org/git/CA+55aFz+gkAsDZ24zmePQuEs1XPS9BP_s8O7Q4wQ7LV7X5-oDA@mail.gmail.com/ -[3] http://lore.kernel.org/git/20170306084353.nrns455dvkdsfgo5@sigill.intra.peff.net/ -[4] http://lore.kernel.org/git/20170304224936.rqqtkdvfjgyezsht@genre.crustytoothpaste.net -[5] https://lore.kernel.org/git/CAJo=hJtoX9=AyLHHpUJS7fueV9ciZ_MNpnEPHUz8Whui6g9F0A@mail.gmail.com/ + [1] https://lore.kernel.org/git/CA+55aFzJtejiCjV0e43+9oR3QuJK2PiFiLQemytoLpyJWe6P9w@mail.gmail.com/ + [2] https://lore.kernel.org/git/CA+55aFz+gkAsDZ24zmePQuEs1XPS9BP_s8O7Q4wQ7LV7X5-oDA@mail.gmail.com/ + [3] https://lore.kernel.org/git/20170306084353.nrns455dvkdsfgo5@sigill.intra.peff.net/ + [4] https://lore.kernel.org/git/20170304224936.rqqtkdvfjgyezsht@genre.crustytoothpaste.net + [5] https://lore.kernel.org/git/CAJo=hJtoX9=AyLHHpUJS7fueV9ciZ_MNpnEPHUz8Whui6g9F0A@mail.gmail.com/ diff --git a/Documentation/technical/http-protocol.txt b/Documentation/technical/http-protocol.txt index 96d89ea9b2..cc5126cfed 100644 --- a/Documentation/technical/http-protocol.txt +++ b/Documentation/technical/http-protocol.txt @@ -225,6 +225,9 @@ The client may send Extra Parameters (see Documentation/technical/pack-protocol.txt) as a colon-separated string in the Git-Protocol HTTP header. +Uses the `--http-backend-info-refs` option to +linkgit:git-upload-pack[1]. + Dumb Server Response ^^^^^^^^^^^^^^^^^^^^ Dumb servers MUST respond with the dumb server reply format. diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index 69edf46c03..65da0daaa5 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -26,7 +26,7 @@ Git index format Extensions are identified by signature. Optional extensions can be ignored if Git does not understand them. - Git currently supports cached tree and resolve undo extensions. + Git currently supports cache tree and resolve undo extensions. 4-byte extension signature. If the first byte is 'A'..'Z' the extension is optional and can be ignored. @@ -44,6 +44,13 @@ Git index format localization, no special casing of directory separator '/'). Entries with the same name are sorted by their stage field. + An index entry typically represents a file. However, if sparse-checkout + is enabled in cone mode (`core.sparseCheckoutCone` is enabled) and the + `extensions.sparseIndex` extension is enabled, then the index may + contain entries for directories outside of the sparse-checkout definition. + These entries have mode `040000`, include the `SKIP_WORKTREE` bit, and + the path ends in a directory separator. + 32-bit ctime seconds, the last time a file's metadata changed this is stat(2) data @@ -136,14 +143,35 @@ Git index format == Extensions -=== Cached tree - - Cached tree extension contains pre-computed hashes for trees that can - be derived from the index. It helps speed up tree object generation - from index for a new commit. - - When a path is updated in index, the path must be invalidated and - removed from tree cache. +=== Cache tree + + Since the index does not record entries for directories, the cache + entries cannot describe tree objects that already exist in the object + database for regions of the index that are unchanged from an existing + commit. The cache tree extension stores a recursive tree structure that + describes the trees that already exist and completely match sections of + the cache entries. This speeds up tree object generation from the index + for a new commit by only computing the trees that are "new" to that + commit. It also assists when comparing the index to another tree, such + as `HEAD^{tree}`, since sections of the index can be skipped when a tree + comparison demonstrates equality. + + The recursive tree structure uses nodes that store a number of cache + entries, a list of subnodes, and an object ID (OID). The OID references + the existing tree for that node, if it is known to exist. The subnodes + correspond to subdirectories that themselves have cache tree nodes. The + number of cache entries corresponds to the number of cache entries in + the index that describe paths within that tree's directory. + + The extension tracks the full directory structure in the cache tree + extension, but this is generally smaller than the full cache entry list. + + When a path is updated in index, Git invalidates all nodes of the + recursive cache tree corresponding to the parent directories of that + path. We store these tree nodes as being "invalid" by using "-1" as the + number of cache entries. Invalid nodes still store a span of index + entries, allowing Git to focus its efforts when reconstructing a full + cache tree. The signature for this extension is { 'T', 'R', 'E', 'E' }. @@ -174,7 +202,8 @@ Git index format first entry represents the root level of the repository, followed by the first subtree--let's call this A--of the root level (with its name relative to the root level), followed by the first subtree of A (with - its name relative to A), ... + its name relative to A), and so on. The specified number of subtrees + indicates when the current level of the recursive stack is complete. === Resolve undo @@ -251,14 +280,14 @@ Git index format - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from ctime field until "file size". - - Stat data of core.excludesfile + - Stat data of core.excludesFile - 32-bit dir_flags (see struct dir_struct) - Hash of $GIT_DIR/info/exclude. A null hash means the file does not exist. - - Hash of core.excludesfile. A null hash means the file does + - Hash of core.excludesFile. A null hash means the file does not exist. - NUL-terminated string of per-dir exclude file name. This usually @@ -363,3 +392,15 @@ The remaining data of each directory block is grouped by type: in this block of entries. - 32-bit count of cache entries in this block + +== Sparse Directory Entries + + When using sparse-checkout in cone mode, some entire directories within + the index can be summarized by pointing to a tree object instead of the + entire expanded list of paths within that tree. An index containing such + entries is a "sparse index". Index format versions 4 and less were not + implemented with such entries in mind. Thus, for these versions, an + index containing sparse directory entries will include this extension + with signature { 's', 'd', 'i', 'r' }. Like the split-index extension, + tools should avoid interacting with a sparse index unless they understand + this extension. diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt index ddfceece64..86f40f2490 100644 --- a/Documentation/technical/multi-pack-index.txt +++ b/Documentation/technical/multi-pack-index.txt @@ -45,8 +45,9 @@ Design Details a change in format. - The MIDX keeps only one record per object ID. If an object appears - in multiple packfiles, then the MIDX selects the copy in the most- - recently modified packfile. + in multiple packfiles, then the MIDX selects the copy in the + preferred packfile, otherwise selecting from the most-recently + modified packfile. - If there exist packfiles in the pack directory not registered in the MIDX, then those packfiles are loaded into the `packed_git` @@ -72,14 +73,10 @@ Future Work still reducing the number of binary searches required for object lookups. -- The reachability bitmap is currently paired directly with a single - packfile, using the pack-order as the object order to hopefully - compress the bitmaps well using run-length encoding. This could be - extended to pair a reachability bitmap with a multi-pack-index. If - the multi-pack-index is extended to store a "stable object order" +- If the multi-pack-index is extended to store a "stable object order" (a function Order(hash) = integer that is constant for a given hash, - even as the multi-pack-index is updated) then a reachability bitmap - could point to a multi-pack-index and be updated independently. + even as the multi-pack-index is updated) then MIDX bitmaps could be + updated independently of the MIDX. - Packfiles can be marked as "special" using empty files that share the initial name but replace ".pack" with ".keep" or ".promisor". diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 96d2fc589f..8d2f42f29e 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -274,6 +274,26 @@ Pack file entry: <+ Index checksum of all of the above. +== pack-*.rev files have the format: + + - A 4-byte magic number '0x52494458' ('RIDX'). + + - A 4-byte version identifier (= 1). + + - A 4-byte hash function identifier (= 1 for SHA-1, 2 for SHA-256). + + - A table of index positions (one per packed object, num_objects in + total, each a 4-byte unsigned integer in network order), sorted by + their corresponding offsets in the packfile. + + - A trailer, containing a: + + checksum of the corresponding packfile, and + + a checksum of all of the above. + +All 4-byte numbers are in network order. + == multi-pack-index (MIDX) files have the following format: The multi-pack-index files refer to multiple pack-files and loose objects. @@ -316,6 +336,9 @@ CHUNK LOOKUP: (Chunks are provided in file-order, so you can infer the length using the next chunk position if necessary.) + The CHUNK LOOKUP matches the table of contents from + link:technical/chunk-format.html[the chunk-based file format]. + The remaining data in the body is described one chunk at a time, and these chunks may be given in any order. Chunks are required unless otherwise specified. @@ -356,3 +379,86 @@ CHUNK DATA: TRAILER: Index checksum of the above contents. + +== multi-pack-index reverse indexes + +Similar to the pack-based reverse index, the multi-pack index can also +be used to generate a reverse index. + +Instead of mapping between offset, pack-, and index position, this +reverse index maps between an object's position within the MIDX, and +that object's position within a pseudo-pack that the MIDX describes +(i.e., the ith entry of the multi-pack reverse index holds the MIDX +position of ith object in pseudo-pack order). + +To clarify the difference between these orderings, consider a multi-pack +reachability bitmap (which does not yet exist, but is what we are +building towards here). Each bit needs to correspond to an object in the +MIDX, and so we need an efficient mapping from bit position to MIDX +position. + +One solution is to let bits occupy the same position in the oid-sorted +index stored by the MIDX. But because oids are effectively random, their +resulting reachability bitmaps would have no locality, and thus compress +poorly. (This is the reason that single-pack bitmaps use the pack +ordering, and not the .idx ordering, for the same purpose.) + +So we'd like to define an ordering for the whole MIDX based around +pack ordering, which has far better locality (and thus compresses more +efficiently). We can think of a pseudo-pack created by the concatenation +of all of the packs in the MIDX. E.g., if we had a MIDX with three packs +(a, b, c), with 10, 15, and 20 objects respectively, we can imagine an +ordering of the objects like: + + |a,0|a,1|...|a,9|b,0|b,1|...|b,14|c,0|c,1|...|c,19| + +where the ordering of the packs is defined by the MIDX's pack list, +and then the ordering of objects within each pack is the same as the +order in the actual packfile. + +Given the list of packs and their counts of objects, you can +naïvely reconstruct that pseudo-pack ordering (e.g., the object at +position 27 must be (c,1) because packs "a" and "b" consumed 25 of the +slots). But there's a catch. Objects may be duplicated between packs, in +which case the MIDX only stores one pointer to the object (and thus we'd +want only one slot in the bitmap). + +Callers could handle duplicates themselves by reading objects in order +of their bit-position, but that's linear in the number of objects, and +much too expensive for ordinary bitmap lookups. Building a reverse index +solves this, since it is the logical inverse of the index, and that +index has already removed duplicates. But, building a reverse index on +the fly can be expensive. Since we already have an on-disk format for +pack-based reverse indexes, let's reuse it for the MIDX's pseudo-pack, +too. + +Objects from the MIDX are ordered as follows to string together the +pseudo-pack. Let `pack(o)` return the pack from which `o` was selected +by the MIDX, and define an ordering of packs based on their numeric ID +(as stored by the MIDX). Let `offset(o)` return the object offset of `o` +within `pack(o)`. Then, compare `o1` and `o2` as follows: + + - If one of `pack(o1)` and `pack(o2)` is preferred and the other + is not, then the preferred one sorts first. ++ +(This is a detail that allows the MIDX bitmap to determine which +pack should be used by the pack-reuse mechanism, since it can ask +the MIDX for the pack containing the object at bit position 0). + + - If `pack(o1) ≠pack(o2)`, then sort the two objects in descending + order based on the pack ID. + + - Otherwise, `pack(o1) = pack(o2)`, and the objects are sorted in + pack-order (i.e., `o1` sorts ahead of `o2` exactly when `offset(o1) + < offset(o2)`). + +In short, a MIDX's pseudo-pack is the de-duplicated concatenation of +objects in packs stored by the MIDX, laid out in pack order, and the +packs arranged in MIDX order (with the preferred pack coming first). + +Finally, note that the MIDX's reverse index is not stored as a chunk in +the multi-pack-index itself. This is done because the reverse index +includes the checksum of the pack or MIDX to which it belongs, which +makes it impossible to write in the MIDX. To avoid races when rewriting +the MIDX, a MIDX reverse index includes the MIDX's checksum in its +filename (e.g., `multi-pack-index-xyz.rev`). diff --git a/Documentation/technical/packfile-uri.txt b/Documentation/technical/packfile-uri.txt index f7eabc6c76..1eb525fe76 100644 --- a/Documentation/technical/packfile-uri.txt +++ b/Documentation/technical/packfile-uri.txt @@ -35,13 +35,14 @@ include some sort of non-trivial implementation in the Minimum Viable Product, at least so that we can test the client. This is the implementation: a feature, marked experimental, that allows the -server to be configured by one or more `uploadpack.blobPackfileUri=<sha1> -<uri>` entries. Whenever the list of objects to be sent is assembled, all such -blobs are excluded, replaced with URIs. As noted in "Future work" below, the -server can evolve in the future to support excluding other objects (or other -implementations of servers could be made that support excluding other objects) -without needing a protocol change, so clients should not expect that packfiles -downloaded in this way only contain single blobs. +server to be configured by one or more `uploadpack.blobPackfileUri= +<object-hash> <pack-hash> <uri>` entries. Whenever the list of objects to be +sent is assembled, all such blobs are excluded, replaced with URIs. As noted +in "Future work" below, the server can evolve in the future to support +excluding other objects (or other implementations of servers could be made +that support excluding other objects) without needing a protocol change, so +clients should not expect that packfiles downloaded in this way only contain +single blobs. Client design ------------- diff --git a/Documentation/technical/parallel-checkout.txt b/Documentation/technical/parallel-checkout.txt new file mode 100644 index 0000000000..e790258a1a --- /dev/null +++ b/Documentation/technical/parallel-checkout.txt @@ -0,0 +1,270 @@ +Parallel Checkout Design Notes +============================== + +The "Parallel Checkout" feature attempts to use multiple processes to +parallelize the work of uncompressing the blobs, applying in-core +filters, and writing the resulting contents to the working tree during a +checkout operation. It can be used by all checkout-related commands, +such as `clone`, `checkout`, `reset`, `sparse-checkout`, and others. + +These commands share the following basic structure: + +* Step 1: Read the current index file into memory. + +* Step 2: Modify the in-memory index based upon the command, and + temporarily mark all cache entries that need to be updated. + +* Step 3: Populate the working tree to match the new candidate index. + This includes iterating over all of the to-be-updated cache entries + and delete, create, or overwrite the associated files in the working + tree. + +* Step 4: Write the new index to disk. + +Step 3 is the focus of the "parallel checkout" effort described here. + +Sequential Implementation +------------------------- + +For the purposes of discussion here, the current sequential +implementation of Step 3 is divided in 3 parts, each one implemented in +its own function: + +* Step 3a: `unpack-trees.c:check_updates()` contains a series of + sequential loops iterating over the `cache_entry`'s array. The main + loop in this function calls the Step 3b function for each of the + to-be-updated entries. + +* Step 3b: `entry.c:checkout_entry()` examines the existing working tree + for file conflicts, collisions, and unsaved changes. It removes files + and creates leading directories as necessary. It calls the Step 3c + function for each entry to be written. + +* Step 3c: `entry.c:write_entry()` loads the blob into memory, smudges + it if necessary, creates the file in the working tree, writes the + smudged contents, calls `fstat()` or `lstat()`, and updates the + associated `cache_entry` struct with the stat information gathered. + +It wouldn't be safe to perform Step 3b in parallel, as there could be +race conditions between file creations and removals. Instead, the +parallel checkout framework lets the sequential code handle Step 3b, +and uses parallel workers to replace the sequential +`entry.c:write_entry()` calls from Step 3c. + +Rejected Multi-Threaded Solution +-------------------------------- + +The most "straightforward" implementation would be to spread the set of +to-be-updated cache entries across multiple threads. But due to the +thread-unsafe functions in the ODB code, we would have to use locks to +coordinate the parallel operation. An early prototype of this solution +showed that the multi-threaded checkout would bring performance +improvements over the sequential code, but there was still too much lock +contention. A `perf` profiling indicated that around 20% of the runtime +during a local Linux clone (on an SSD) was spent in locking functions. +For this reason this approach was rejected in favor of using multiple +child processes, which led to a better performance. + +Multi-Process Solution +---------------------- + +Parallel checkout alters the aforementioned Step 3 to use multiple +`checkout--worker` background processes to distribute the work. The +long-running worker processes are controlled by the foreground Git +command using the existing run-command API. + +Overview +~~~~~~~~ + +Step 3b is only slightly altered; for each entry to be checked out, the +main process performs the following steps: + +* M1: Check whether there is any untracked or unclean file in the + working tree which would be overwritten by this entry, and decide + whether to proceed (removing the file(s)) or not. + +* M2: Create the leading directories. + +* M3: Load the conversion attributes for the entry's path. + +* M4: Check, based on the entry's type and conversion attributes, + whether the entry is eligible for parallel checkout (more on this + later). If it is eligible, enqueue the entry and the loaded + attributes to later write the entry in parallel. If not, write the + entry right away, using the default sequential code. + +Note: we save the conversion attributes associated with each entry +because the workers don't have access to the main process' index state, +so they can't load the attributes by themselves (and the attributes are +needed to properly smudge the entry). Additionally, this has a positive +impact on performance as (1) we don't need to load the attributes twice +and (2) the attributes machinery is optimized to handle paths in +sequential order. + +After all entries have passed through the above steps, the main process +checks if the number of enqueued entries is sufficient to spread among +the workers. If not, it just writes them sequentially. Otherwise, it +spawns the workers and distributes the queued entries uniformly in +continuous chunks. This aims to minimize the chances of two workers +writing to the same directory simultaneously, which could increase lock +contention in the kernel. + +Then, for each assigned item, each worker: + +* W1: Checks if there is any non-directory file in the leading part of + the entry's path or if there already exists a file at the entry' path. + If so, mark the entry with `PC_ITEM_COLLIDED` and skip it (more on + this later). + +* W2: Creates the file (with O_CREAT and O_EXCL). + +* W3: Loads the blob into memory (inflating and delta reconstructing + it). + +* W4: Applies any required in-process filter, like end-of-line + conversion and re-encoding. + +* W5: Writes the result to the file descriptor opened at W2. + +* W6: Calls `fstat()` or lstat()` on the just-written path, and sends + the result back to the main process, together with the end status of + the operation and the item's identification number. + +Note that, when possible, steps W3 to W5 are delegated to the streaming +machinery, removing the need to keep the entire blob in memory. + +If the worker fails to read the blob or to write it to the working tree, +it removes the created file to avoid leaving empty files behind. This is +the *only* time a worker is allowed to remove a file. + +As mentioned earlier, it is the responsibility of the main process to +remove any file that blocks the checkout operation (or abort if the +removal(s) would cause data loss and the user didn't ask to `--force`). +This is crucial to avoid race conditions and also to properly detect +path collisions at Step W1. + +After the workers finish writing the items and sending back the required +information, the main process handles the results in two steps: + +- First, it updates the in-memory index with the `lstat()` information + sent by the workers. (This must be done first as this information + might me required in the following step.) + +- Then it writes the items which collided on disk (i.e. items marked + with `PC_ITEM_COLLIDED`). More on this below. + +Path Collisions +--------------- + +Path collisions happen when two different paths correspond to the same +entry in the file system. E.g. the paths 'a' and 'A' would collide in a +case-insensitive file system. + +The sequential checkout deals with collisions in the same way that it +deals with files that were already present in the working tree before +checkout. Basically, it checks if the path that it wants to write +already exists on disk, makes sure the existing file doesn't have +unsaved data, and then overwrites it. (To be more pedantic: it deletes +the existing file and creates the new one.) So, if there are multiple +colliding files to be checked out, the sequential code will write each +one of them but only the last will actually survive on disk. + +Parallel checkout aims to reproduce the same behavior. However, we +cannot let the workers racily write to the same file on disk. Instead, +the workers detect when the entry that they want to check out would +collide with an existing file, and mark it with `PC_ITEM_COLLIDED`. +Later, the main process can sequentially feed these entries back to +`checkout_entry()` without the risk of race conditions. On clone, this +also has the effect of marking the colliding entries to later emit a +warning for the user, like the classic sequential checkout does. + +The workers are able to detect both collisions among the entries being +concurrently written and collisions between a parallel-eligible entry +and an ineligible entry. The general idea for collision detection is +quite straightforward: for each parallel-eligible entry, the main +process must remove all files that prevent this entry from being written +(before enqueueing it). This includes any non-directory file in the +leading path of the entry. Later, when a worker gets assigned the entry, +it looks again for the non-directories files and for an already existing +file at the entry's path. If any of these checks finds something, the +worker knows that there was a path collision. + +Because parallel checkout can distinguish path collisions from the case +where the file was already present in the working tree before checkout, +we could alternatively choose to skip the checkout of colliding entries. +However, each entry that doesn't get written would have NULL `lstat()` +fields on the index. This could cause performance penalties for +subsequent commands that need to refresh the index, as they would have +to go to the file system to see if the entry is dirty. Thus, if we have +N entries in a colliding group and we decide to write and `lstat()` only +one of them, every subsequent `git-status` will have to read, convert, +and hash the written file N - 1 times. By checking out all colliding +entries (like the sequential code does), we only pay the overhead once, +during checkout. + +Eligible Entries for Parallel Checkout +-------------------------------------- + +As previously mentioned, not all entries passed to `checkout_entry()` +will be considered eligible for parallel checkout. More specifically, we +exclude: + +- Symbolic links; to avoid race conditions that, in combination with + path collisions, could cause workers to write files at the wrong + place. For example, if we were to concurrently check out a symlink + 'a' -> 'b' and a regular file 'A/f' in a case-insensitive file system, + we could potentially end up writing the file 'A/f' at 'a/f', due to a + race condition. + +- Regular files that require external filters (either "one shot" filters + or long-running process filters). These filters are black-boxes to Git + and may have their own internal locking or non-concurrent assumptions. + So it might not be safe to run multiple instances in parallel. ++ +Besides, long-running filters may use the delayed checkout feature to +postpone the return of some filtered blobs. The delayed checkout queue +and the parallel checkout queue are not compatible and should remain +separate. ++ +Note: regular files that only require internal filters, like end-of-line +conversion and re-encoding, are eligible for parallel checkout. + +Ineligible entries are checked out by the classic sequential codepath +*before* spawning workers. + +Note: submodules's files are also eligible for parallel checkout (as +long as they don't fall into any of the excluding categories mentioned +above). But since each submodule is checked out in its own child +process, we don't mix the superproject's and the submodules' files in +the same parallel checkout process or queue. + +The API +------- + +The parallel checkout API was designed with the goal of minimizing +changes to the current users of the checkout machinery. This means that +they don't have to call a different function for sequential or parallel +checkout. As already mentioned, `checkout_entry()` will automatically +insert the given entry in the parallel checkout queue when this feature +is enabled and the entry is eligible; otherwise, it will just write the +entry right away, using the sequential code. In general, callers of the +parallel checkout API should look similar to this: + +---------------------------------------------- +int pc_workers, pc_threshold, err = 0; +struct checkout state; + +get_parallel_checkout_configs(&pc_workers, &pc_threshold); + +/* + * This check is not strictly required, but it + * should save some time in sequential mode. + */ +if (pc_workers > 1) + init_parallel_checkout(); + +for (each cache_entry ce to-be-updated) + err |= checkout_entry(ce, &state, NULL, NULL); + +err |= run_parallel_checkout(&state, pc_workers, pc_threshold, NULL, NULL); +---------------------------------------------- diff --git a/Documentation/technical/partial-clone.txt b/Documentation/technical/partial-clone.txt index 0780d30cac..a0dd7c66f2 100644 --- a/Documentation/technical/partial-clone.txt +++ b/Documentation/technical/partial-clone.txt @@ -242,8 +242,7 @@ remote in a specific order. repository and can satisfy all such requests. - Repack essentially treats promisor and non-promisor packfiles as 2 - distinct partitions and does not mix them. Repack currently only works - on non-promisor packfiles and loose objects. + distinct partitions and does not mix them. - Dynamic object fetching invokes fetch-pack once *for each item* because most algorithms stumble upon a missing object and need to have @@ -273,9 +272,6 @@ to use those promisor remotes in that order." The user might want to work in a triangular work flow with multiple promisor remotes that each have an incomplete view of the repository. -- Allow repack to work on promisor packfiles (while keeping them distinct - from non-promisor packfiles). - - Allow non-pathname-based filters to make use of packfile bitmaps (when present). This was just an omission during the initial implementation. diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 85daeb5d9e..21e8258ccf 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -33,8 +33,8 @@ In protocol v2 these special packets will have the following semantics: * '0000' Flush Packet (flush-pkt) - indicates the end of a message * '0001' Delimiter Packet (delim-pkt) - separates sections of a message - * '0002' Message Packet (response-end-pkt) - indicates the end of a response - for stateless connections + * '0002' Response End Packet (response-end-pkt) - indicates the end of a + response for stateless connections Initial Client Request ---------------------- @@ -42,7 +42,8 @@ Initial Client Request In general a client can request to speak protocol v2 by sending `version=2` through the respective side-channel for the transport being used which inevitably sets `GIT_PROTOCOL`. More information can be -found in `pack-protocol.txt` and `http-protocol.txt`. In all cases the +found in `pack-protocol.txt` and `http-protocol.txt`, as well as the +`GIT_PROTOCOL` definition in `git.txt`. In all cases the response from the server is the capability advertisement. Git Transport @@ -58,6 +59,8 @@ SSH and File Transport When using either the ssh:// or file:// transport, the GIT_PROTOCOL environment variable must be set explicitly to include "version=2". +The server may need to be configured to allow this environment variable +to pass. HTTP Transport ~~~~~~~~~~~~~~ @@ -81,6 +84,12 @@ A v2 server would reply: Subsequent requests are then made directly to the service `$GIT_URL/git-upload-pack`. (This works the same for git-receive-pack). +Uses the `--http-backend-info-refs` option to +linkgit:git-upload-pack[1]. + +The server may need to be configured to pass this header's contents via +the `GIT_PROTOCOL` variable. See the discussion in `git-http-backend.txt`. + Capability Advertisement ------------------------ @@ -190,13 +199,26 @@ ls-refs takes in the following arguments: Show peeled tags. ref-prefix <prefix> When specified, only references having a prefix matching one of - the provided prefixes are displayed. + the provided prefixes are displayed. Multiple instances may be + given, in which case references matching any prefix will be + shown. Note that this is purely for optimization; a server MAY + show refs not matching the prefix if it chooses, and clients + should filter the result themselves. + +If the 'unborn' feature is advertised the following argument can be +included in the client's request. + + unborn + The server will send information about HEAD even if it is a symref + pointing to an unborn branch in the form "unborn HEAD + symref-target:<target>". The output of ls-refs is as follows: output = *ref flush-pkt - ref = PKT-LINE(obj-id SP refname *(SP ref-attribute) LF) + obj-id-or-unborn = (obj-id | "unborn") + ref = PKT-LINE(obj-id-or-unborn SP refname *(SP ref-attribute) LF) ref-attribute = (symref | peeled) symref = "symref-target:" symref-target peeled = "peeled:" obj-id @@ -337,6 +359,14 @@ explained below. client should download from all given URIs. Currently, the protocols supported are "http" and "https". +If the 'wait-for-done' feature is advertised, the following argument +can be included in the client's request. + + wait-for-done + Indicates to the server that it should never send "ready", but + should wait for the client to say "done" before sending the + packfile. + The response of `fetch` is broken into a number of sections separated by delimiter packets (0001), with each section beginning with its section header. Most sections are sent only when the packfile is sent. @@ -505,3 +535,34 @@ packet-line, and must not contain non-printable or whitespace characters. The current implementation uses trace2 session IDs (see link:api-trace2.html[api-trace2] for details), but this may change and users of the session ID should not rely on this fact. + +object-info +~~~~~~~~~~~ + +`object-info` is the command to retrieve information about one or more objects. +Its main purpose is to allow a client to make decisions based on this +information without having to fully fetch objects. Object size is the only +information that is currently supported. + +An `object-info` request takes the following arguments: + + size + Requests size information to be returned for each listed object id. + + oid <oid> + Indicates to the server an object which the client wants to obtain + information for. + +The response of `object-info` is a list of the requested object ids +and associated requested information, each separated by a single space. + + output = info flush-pkt + + info = PKT-LINE(attrs) LF) + *PKT-LINE(obj-info LF) + + attrs = attr | attrs SP attrs + + attr = "size" + + obj-info = obj-id SP obj-size diff --git a/Documentation/technical/reftable.txt b/Documentation/technical/reftable.txt index 8095ab2590..d7c3b645cf 100644 --- a/Documentation/technical/reftable.txt +++ b/Documentation/technical/reftable.txt @@ -872,17 +872,11 @@ A repository must set its `$GIT_DIR/config` to configure reftable: Layout ^^^^^^ -A collection of reftable files are stored in the `$GIT_DIR/reftable/` -directory: - -.... -00000001-00000001.log -00000002-00000002.ref -00000003-00000003.ref -.... - -where reftable files are named by a unique name such as produced by the -function `${min_update_index}-${max_update_index}.ref`. +A collection of reftable files are stored in the `$GIT_DIR/reftable/` directory. +Their names should have a random element, such that each filename is globally +unique; this helps avoid spurious failures on Windows, where open files cannot +be removed or overwritten. It suggested to use +`${min_update_index}-${max_update_index}-${random}.ref` as a naming convention. Log-only files use the `.log` extension, while ref-only and mixed ref and log files use `.ref`. extension. @@ -893,9 +887,9 @@ current files, one per line, in order, from oldest (base) to newest .... $ cat .git/reftable/tables.list -00000001-00000001.log -00000002-00000002.ref -00000003-00000003.ref +00000001-00000001-RANDOM1.log +00000002-00000002-RANDOM2.ref +00000003-00000003-RANDOM3.ref .... Readers must read `$GIT_DIR/reftable/tables.list` to determine which @@ -940,7 +934,7 @@ new reftable and atomically appending it to the stack: 3. Select `update_index` to be most recent file's `max_update_index + 1`. 4. Prepare temp reftable `tmp_XXXXXX`, including log entries. -5. Rename `tmp_XXXXXX` to `${update_index}-${update_index}.ref`. +5. Rename `tmp_XXXXXX` to `${update_index}-${update_index}-${random}.ref`. 6. Copy `tables.list` to `tables.list.lock`, appending file from (5). 7. Rename `tables.list.lock` to `tables.list`. @@ -993,7 +987,7 @@ prevents other processes from trying to compact these files. should always be the case, assuming that other processes are adhering to the locking protocol. 7. Rename `${min_update_index}-${max_update_index}_XXXXXX` to -`${min_update_index}-${max_update_index}.ref`. +`${min_update_index}-${max_update_index}-${random}.ref`. 8. Write the new stack to `tables.list.lock`, replacing `B` and `C` with the file from (4). 9. Rename `tables.list.lock` to `tables.list`. @@ -1005,6 +999,27 @@ This strategy permits compactions to proceed independently of updates. Each reftable (compacted or not) is uniquely identified by its name, so open reftables can be cached by their name. +Windows +^^^^^^^ + +On windows, and other systems that do not allow deleting or renaming to open +files, compaction may succeed, but other readers may prevent obsolete tables +from being deleted. + +On these platforms, the following strategy can be followed: on closing a +reftable stack, reload `tables.list`, and delete any tables no longer mentioned +in `tables.list`. + +Irregular program exit may still leave about unused files. In this case, a +cleanup operation should proceed as follows: + +* take a lock `tables.list.lock` to prevent concurrent modifications +* refresh the reftable stack, by reading `tables.list` +* for each `*.ref` file, remove it if +** it is not mentioned in `tables.list`, and +** its max update_index is not beyond the max update_index of the stack + + Alternatives considered ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/technical/remembering-renames.txt b/Documentation/technical/remembering-renames.txt new file mode 100644 index 0000000000..2fd5cc88e0 --- /dev/null +++ b/Documentation/technical/remembering-renames.txt @@ -0,0 +1,671 @@ +Rebases and cherry-picks involve a sequence of merges whose results are +recorded as new single-parent commits. The first parent side of those +merges represent the "upstream" side, and often include a far larger set of +changes than the second parent side. Traditionally, the renames on the +first-parent side of that sequence of merges were repeatedly re-detected +for every merge. This file explains why it is safe and effective during +rebases and cherry-picks to remember renames on the upstream side of +history as an optimization, assuming all merges are automatic and clean +(i.e. no conflicts and not interrupted for user input or editing). + +Outline: + + 0. Assumptions + + 1. How rebasing and cherry-picking work + + 2. Why the renames on MERGE_SIDE1 in any given pick are *always* a + superset of the renames on MERGE_SIDE1 for the next pick. + + 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ always also + a rename on MERGE_SIDE1 for the next pick + + 4. A detailed description of the the counter-examples to #3. + + 5. Why the special cases in #4 are still fully reasonable to use to pair + up files for three-way content merging in the merge machinery, and why + they do not affect the correctness of the merge. + + 6. Interaction with skipping of "irrelevant" renames + + 7. Additional items that need to be cached + + 8. How directory rename detection interacts with the above and why this + optimization is still safe even if merge.directoryRenames is set to + "true". + + +=== 0. Assumptions === + +There are two assumptions that will hold throughout this document: + + * The upstream side where commits are transplanted to is treated as the + first parent side when rebase/cherry-pick call the merge machinery + + * All merges are fully automatic + +and a third that will hold in sections 2-5 for simplicity, that I'll later +address in section 8: + + * No directory renames occur + + +Let me explain more about each assumption and why I include it: + + +The first assumption is merely for the purposes of making this document +clearer; the optimization implementation does not actually depend upon it. +However, the assumption does hold in all cases because it reflects the way +that both rebase and cherry-pick were implemented; and the implementation +of cherry-pick and rebase are not readily changeable for backwards +compatibility reasons (see for example the discussion of the --ours and +--theirs flag in the documentation of `git checkout`, particularly the +comments about how they behave with rebase). The optimization avoids +checking first-parent-ness, though. It checks the conditions that make the +optimization valid instead, so it would still continue working if someone +changed the parent ordering that cherry-pick and rebase use. But making +this assumption does make this document much clearer and prevents me from +having to repeat every example twice. + +If the second assumption is violated, then the optimization simply is +turned off and thus isn't relevant to consider. The second assumption can +also be stated as "there is no interruption for a user to resolve conflicts +or to just further edit or tweak files". While real rebases and +cherry-picks are often interrupted (either because it's an interactive +rebase where the user requested to stop and edit, or because there were +conflicts that the user needs to resolve), the cache of renames is not +stored on disk, and thus is thrown away as soon as the rebase or cherry +pick stops for the user to resolve the operation. + +The third assumption makes sections 2-5 simpler, and allows people to +understand the basics of why this optimization is safe and effective, and +then I can go back and address the specifics in section 8. It is probably +also worth noting that if directory renames do occur, then the default of +merge.directoryRenames being set to "conflict" means that the operation +will stop for users to resolve the conflicts and the cache will be thrown +away, and thus that there won't be an optimization to apply. So, the only +reason we need to address directory renames specifically, is that some +users will have set merge.directoryRenames to "true" to allow the merges to +continue to proceed automatically. The optimization is still safe with +this config setting, but we have to discuss a few more cases to show why; +this discussion is deferred until section 8. + + +=== 1. How rebasing and cherry-picking work === + +Consider the following setup (from the git-rebase manpage): + + A---B---C topic + / + D---E---F---G main + +After rebasing or cherry-picking topic onto main, this will appear as: + + A'--B'--C' topic + / + D---E---F---G main + +The way the commits A', B', and C' are created is through a series of +merges, where rebase or cherry-pick sequentially uses each of the three +A-B-C commits in a special merge operation. Let's label the three commits +in the merge operation as MERGE_BASE, MERGE_SIDE1, and MERGE_SIDE2. For +this picture, the three commits for each of the three merges would be: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +To create C': + MERGE_BASE: B + MERGE_SIDE1: B' + MERGE_SIDE2: C + +Sometimes, folks are surprised that these three-way merges are done. It +can be useful in understanding these three-way merges to view them in a +slightly different light. For example, in creating C', you can view it as +either: + + * Apply the changes between B & C to B' + * Apply the changes between B & B' to C + +Conceptually the two statements above are the same as a three-way merge of +B, B', and C, at least the parts before you decide to record a commit. + + +=== 2. Why the renames on MERGE_SIDE1 in any given pick are always a === +=== superset of the renames on MERGE_SIDE1 for the next pick. === + +The merge machinery uses the filenames it is fed from MERGE_BASE, +MERGE_SIDE1, and MERGE_SIDE2. It will only move content to a different +filename under one of three conditions: + + * To make both pieces of a conflict available to a user during conflict + resolution (examples: directory/file conflict, add/add type conflict + such as symlink vs. regular file) + + * When MERGE_SIDE1 renames the file. + + * When MERGE_SIDE2 renames the file. + +First, let's remember what commits are involved in the first and second +picks of the cherry-pick or rebase sequence: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +So, in particular, we need to show that the renames between E and G are a +superset of those between A and A'. + +A' is created by the first merge. A' will only have renames for one of the +three reasons listed above. The first case, a conflict, results in a +situation where the cache is dropped and thus this optimization doesn't +take effect, so we need not consider that case. The third case, a rename +on MERGE_SIDE2 (i.e. from G to A), will show up in A' but it also shows up +in A -- therefore when diffing A and A' that path does not show up as a +rename. The only remaining way for renames to show up in A' is for the +rename to come from MERGE_SIDE1. Therefore, all renames between A and A' +are a subset of those between E and G. Equivalently, all renames between E +and G are a superset of those between A and A'. + + +=== 3. Why any rename on MERGE_SIDE1 in any given pick is _almost_ === +=== always also a rename on MERGE_SIDE1 for the next pick. === + +Let's again look at the first two picks: + +To create A': + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + +To create B': + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + +Now let's look at any given rename from MERGE_SIDE1 of the first pick, i.e. +any given rename from E to G. Let's use the filenames 'oldfile' and +'newfile' for demonstration purposes. That first pick will function as +follows; when the rename is detected, the merge machinery will do a +three-way content merge of the following: + E:oldfile + G:newfile + A:oldfile +and produce a new result: + A':newfile + +Note above that I've assumed that E->A did not rename oldfile. If that +side did rename, then we most likely have a rename/rename(1to2) conflict +that will cause the rebase or cherry-pick operation to halt and drop the +in-memory cache of renames and thus doesn't need to be considered further. +In the special case that E->A does rename the file but also renames it to +newfile, then there is no conflict from the renaming and the merge can +succeed. In this special case, the rename is not valid to cache because +the second merge will find A:newfile in the MERGE_BASE (see also the new +testcases in t6429 with "rename same file identically" in their +description). So a rename/rename(1to1) needs to be specially handled by +pruning renames from the cache and decrementing the dir_rename_counts in +the current and leading directories associated with those renames. Or, +since these are really rare, one could just take the easy way out and +disable the remembering renames optimization when a rename/rename(1to1) +happens. + +The previous paragraph handled the cases for E->A renaming oldfile, let's +continue assuming that oldfile is not renamed in A. + +As per the diagram for creating B', MERGE_SIDE1 involves the changes from A +to A'. So, we are curious whether A:oldfile and A':newfile will be viewed +as renames. Note that: + + * There will be no A':oldfile (because there could not have been a + G:oldfile as we do not do break detection in the merge machinery and + G:newfile was detected as a rename, and by the construction of the + rename above that merged cleanly, the merge machinery will ensure there + is no 'oldfile' in the result). + + * There will be no A:newfile (if there had been, we would have had a + rename/add conflict). + + * Clearly A:oldfile and A':newfile are "related" (A':newfile came from a + clean three-way content merge involving A:oldfile). + +We can also expound on the third point above, by noting that three-way +content merges can also be viewed as applying the differences between the +base and one side to the other side. Thus we can view A':newfile as +having been created by taking the changes between E:oldfile and G:newfile +(which were detected as being related, i.e. <50% changed) to A:oldfile. + +Thus A:oldfile and A':newfile are just as related as E:oldfile and +G:newfile are -- they have exactly identical differences. Since the latter +were detected as renames, A:oldfile and A':newfile should also be +detectable as renames almost always. + + +=== 4. A detailed description of the counter-examples to #3. === + +We already noted in section 3 that rename/rename(1to1) (i.e. both sides +renaming a file the same way) was one counter-example. The more +interesting bit, though, is why did we need to use the "almost" qualifier +when stating that A:oldfile and A':newfile are "almost" always detectable +as renames? + +Let's repeat an earlier point that section 3 made: + + A':newfile was created by applying the changes between E:oldfile and + G:newfile to A:oldfile. The changes between E:oldfile and G:newfile were + <50% of the size of E:oldfile. + +If those changes that were <50% of the size of E:oldfile are also <50% of +the size of A:oldfile, then A:oldfile and A':newfile will be detectable as +renames. However, if there is a dramatic size reduction between E:oldfile +and A:oldfile (but the changes between E:oldfile, G:newfile, and A:oldfile +still somehow merge cleanly), then traditional rename detection would not +detect A:oldfile and A':newfile as renames. + +Here's an example where that can happen: + * E:oldfile had 20 lines + * G:newfile added 10 new lines at the beginning of the file + * A:oldfile kept the first 3 lines of the file, and deleted all the rest +then + => A':newfile would have 13 lines, 3 of which matches those in A:oldfile. +E:oldfile -> G:newfile would be detected as a rename, but A:oldfile and +A':newfile would not be. + + +=== 5. Why the special cases in #4 are still fully reasonable to use to === +=== pair up files for three-way content merging in the merge machinery, === +=== and why they do not affect the correctness of the merge. === + +In the rename/rename(1to1) case, A:newfile and A':newfile are not renames +since they use the *same* filename. However, files with the same filename +are obviously fine to pair up for three-way content merging (the merge +machinery has never employed break detection). The interesting +counter-example case is thus not the rename/rename(1to1) case, but the case +where A did not rename oldfile. That was the case that we spent most of +the time discussing in sections 3 and 4. The remainder of this section +will be devoted to that case as well. + +So, even if A:oldfile and A':newfile aren't detectable as renames, why is +it still reasonable to pair them up for three-way content merging in the +merge machinery? There are multiple reasons: + + * As noted in sections 3 and 4, the diff between A:oldfile and A':newfile + is *exactly* the same as the diff between E:oldfile and G:newfile. The + latter pair were detected as renames, so it seems unlikely to surprise + users for us to treat A:oldfile and A':newfile as renames. + + * In fact, "oldfile" and "newfile" were at one point detected as renames + due to how they were constructed in the E..G chain. And we used that + information once already in this rebase/cherry-pick. I think users + would be unlikely to be surprised at us continuing to treat the files + as renames and would quickly understand why we had done so. + + * Marking or declaring files as renames is *not* the end goal for merges. + Merges use renames to determine which files make sense to be paired up + for three-way content merges. + + * A:oldfile and A':newfile were _already_ paired up in a three-way + content merge; that is how A':newfile was created. In fact, that + three-way content merge was clean. So using them again in a later + three-way content merge seems very reasonable. + +However, the above is focusing on the common scenarios. Let's try to look +at all possible unusual scenarios and compare without the optimization to +with the optimization. Consider the following theoretical cases; we will +then dive into each to determine which of them are possible, +and if so, what they mean: + + 1. Without the optimization, the second merge results in a conflict. + With the optimization, the second merge also results in a conflict. + Questions: Are the conflicts confusingly different? Better in one case? + + 2. Without the optimization, the second merge results in NO conflict. + With the optimization, the second merge also results in NO conflict. + Questions: Are the merges the same? + + 3. Without the optimization, the second merge results in a conflict. + With the optimization, the second merge results in NO conflict. + Questions: Possible? Bug, bugfix, or something else? + + 4. Without the optimization, the second merge results in NO conflict. + With the optimization, the second merge results in a conflict. + Questions: Possible? Bug, bugfix, or something else? + +I'll consider all four cases, but out of order. + +The fourth case is impossible. For the code without the remembering +renames optimization to not get a conflict, B:oldfile would need to exactly +match A:oldfile -- if it doesn't, there would be a modify/delete conflict. +If A:oldfile matches B:oldfile exactly, then a three-way content merge +between A:oldfile, A':newfile, and B:oldfile would have no conflict and +just give us the version of newfile from A' as the result. + +From the same logic as the above paragraph, the second case would indeed +result in identical merges. When A:oldfile exactly matches B:oldfile, an +undetected rename would say, "Oh, I see one side didn't modify 'oldfile' +and the other side deleted it. I'll delete it. And I see you have this +brand new file named 'newfile' in A', so I'll keep it." That gives the +same results as three-way content merging A:oldfile, A':newfile, and +B:oldfile -- a removal of oldfile with the version of newfile from A' +showing up in the result. + +The third case is interesting. It means that A:oldfile and A':newfile were +not just similar enough, but that the changes between them did not conflict +with the changes between A:oldfile and B:oldfile. This would validate our +hunch that the files were similar enough to be used in a three-way content +merge, and thus seems entirely correct for us to have used them that way. +(Sidenote: One particular example here may be enlightening. Let's say that +B was an immediate revert of A. B clearly would have been a clean revert +of A, since A was B's immediate parent. One would assume that if you can +pick a commit, you should also be able to cherry-pick its immediate revert. +However, this is one of those funny corner cases; without this +optimization, we just successfully picked a commit cleanly, but we are +unable to cherry-pick its immediate revert due to the size differences +between E:oldfile and A:oldfile.) + +That leaves only the first case to consider -- when we get conflicts both +with or without the optimization. Without the optimization, we'll have a +modify/delete conflict, where both A':newfile and B:oldfile are left in the +tree for the user to deal with and no hints about the potential similarity +between the two. With the optimization, we'll have a three-way content +merged A:oldfile, A':newfile, and B:oldfile with conflict markers +suggesting we thought the files were related but giving the user the chance +to resolve. As noted above, I don't think users will find us treating +'oldfile' and 'newfile' as related as a surprise since they were between E +and G. In any event, though, this case shouldn't be concerning since we +hit a conflict in both cases, told the user what we know, and asked them to +resolve it. + +So, in summary, case 4 is impossible, case 2 yields the same behavior, and +cases 1 and 3 seem to provide as good or better behavior with the +optimization than without. + + +=== 6. Interaction with skipping of "irrelevant" renames === + +Previous optimizations involved skipping rename detection for paths +considered to be "irrelevant". See for example the following commits: + + * 32a56dfb99 ("merge-ort: precompute subset of sources for which we + need rename detection", 2021-03-11) + * 2fd9eda462 ("merge-ort: precompute whether directory rename + detection is needed", 2021-03-11) + * 9bd342137e ("diffcore-rename: determine which relevant_sources are + no longer relevant", 2021-03-13) + +Relevance is always determined by what the _other_ side of history has +done, in terms of modifing a file that our side renamed, or adding a +file to a directory which our side renamed. This means that a path +that is "irrelevant" when picking the first commit of a series in a +rebase or cherry-pick, may suddenly become "relevant" when picking the +next commit. + +The upshot of this is that we can only cache rename detection results +for relevant paths, and need to re-check relevance in subsequent +commits. If those subsequent commits have additional paths that are +relevant for rename detection, then we will need to redo rename +detection -- though we can limit it to the paths for which we have not +already detected renames. + + +=== 7. Additional items that need to be cached === + +It turns out we have to cache more than just renames; we also cache: + + A) non-renames (i.e. unpaired deletes) + B) counts of renames within directories + C) sources that were marked as RELEVANT_LOCATION, but which were + downgraded to RELEVANT_NO_MORE + D) the toplevel trees involved in the merge + +These are all stored in struct rename_info, and respectively appear in + * cached_pairs (along side actual renames, just with a value of NULL) + * dir_rename_counts + * cached_irrelevant + * merge_trees + +The reason for (A) comes from the irrelevant renames skipping +optimization discussed in section 6. The fact that irrelevant renames +are skipped means we only get a subset of the potential renames +detected and subsequent commits may need to run rename detection on +the upstream side on a subset of the remaining renames (to get the +renames that are relevant for that later commit). Since unpaired +deletes are involved in rename detection too, we don't want to +repeatedly check that those paths remain unpaired on the upstream side +with every commit we are transplanting. + +The reason for (B) is that diffcore_rename_extended() is what +generates the counts of renames by directory which is needed in +directory rename detection, and if we don't run +diffcore_rename_extended() again then we need to have the output from +it, including dir_rename_counts, from the previous run. + +The reason for (C) is that merge-ort's tree traversal will again think +those paths are relevant (marking them as RELEVANT_LOCATION), but the +fact that they were downgraded to RELEVANT_NO_MORE means that +dir_rename_counts already has the information we need for directory +rename detection. (A path which becomes RELEVANT_CONTENT in a +subsequent commit will be removed from cached_irrelevant.) + +The reason for (D) is that is how we determine whether the remember +renames optimization can be used. In particular, remembering that our +sequence of merges looks like: + + Merge 1: + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + => Creates A' + + Merge 2: + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + => Creates B' + +It is the fact that the trees A and A' appear both in Merge 1 and in +Merge 2, with A as a parent of A' that allows this optimization. So +we store the trees to compare with what we are asked to merge next +time. + + +=== 8. How directory rename detection interacts with the above and === +=== why this optimization is still safe even if === +=== merge.directoryRenames is set to "true". === + +As noted in the assumptions section: + + """ + ...if directory renames do occur, then the default of + merge.directoryRenames being set to "conflict" means that the operation + will stop for users to resolve the conflicts and the cache will be + thrown away, and thus that there won't be an optimization to apply. + So, the only reason we need to address directory renames specifically, + is that some users will have set merge.directoryRenames to "true" to + allow the merges to continue to proceed automatically. + """ + +Let's remember that we need to look at how any given pick affects the next +one. So let's again use the first two picks from the diagram in section +one: + + First pick does this three-way merge: + MERGE_BASE: E + MERGE_SIDE1: G + MERGE_SIDE2: A + => creates A' + + Second pick does this three-way merge: + MERGE_BASE: A + MERGE_SIDE1: A' + MERGE_SIDE2: B + => creates B' + +Now, directory rename detection exists so that if one side of history +renames a directory, and the other side adds a new file to the old +directory, then the merge (with merge.directoryRenames=true) can move the +file into the new directory. There are two qualitatively different ways to +add a new file to an old directory: create a new file, or rename a file +into that directory. Also, directory renames can be done on either side of +history, so there are four cases to consider: + + * MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir + * MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir + * MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir + * MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir + +One last note before we consider these four cases: There are some +important properties about how we implement this optimization with +respect to directory rename detection that we need to bear in mind +while considering all of these cases: + + * rename caching occurs *after* applying directory renames + + * a rename created by directory rename detection is recorded for the side + of history that did the directory rename. + + * dir_rename_counts, the nested map of + {oldname => {newname => count}}, + is cached between runs as well. This basically means that directory + rename detection is also cached, though only on the side of history + that we cache renames for (MERGE_SIDE1 as far as this document is + concerned; see the assumptions section). Two interesting sub-notes + about these counts: + + * If we need to perform rename-detection again on the given side (e.g. + some paths are relevant for rename detection that weren't before), + then we clear dir_rename_counts and recompute it, making use of + cached_pairs. The reason it is important to do this is optimizations + around RELEVANT_LOCATION exist to prevent us from computing + unnecessary renames for directory rename detection and from computing + dir_rename_counts for irrelevant directories; but those same renames + or directories may become necessary for subsequent merges. The + easiest way to "fix up" dir_rename_counts in such cases is to just + recompute it. + + * If we prune rename/rename(1to1) entries from the cache, then we also + need to update dir_rename_counts to decrement the counts for the + involved directory and any relevant parent directories (to undo what + update_dir_rename_counts() in diffcore-rename.c incremented when the + rename was initially found). If we instead just disable the + remembering renames optimization when the exceedingly rare + rename/rename(1to1) cases occur, then dir_rename_counts will get + re-computed the next time rename detection occurs, as noted above. + + * the side with multiple commits to pick, is the side of history that we + do NOT cache renames for. Thus, there are no additional commits to + change the number of renames in a directory, except for those done by + directory rename detection (which always pad the majority). + + * the "renames" we cache are modified slightly by any directory rename, + as noted below. + +Now, with those notes out of the way, let's go through the four cases +in order: + +Case 1: MERGE_SIDE1 renames old dir, MERGE_SIDE2 adds new file to old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Renames olddir/ -> newdir/ + MERGE_SIDE2: A, Adds olddir/newfile + => creates A', With newdir/newfile + + MERGE_BASE: A, Has olddir/newfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Modifies olddir/newfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers olddir/ -> newdir/ + * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Case 2: MERGE_SIDE1 renames old dir, MERGE_SIDE2 renames file into old dir + + This case looks like this: + MERGE_BASE: E oldfile, olddir/ + MERGE_SIDE1: G oldfile, olddir/ -> newdir/ + MERGE_SIDE2: A oldfile -> olddir/newfile + => creates A', With newdir/newfile representing original oldfile + + MERGE_BASE: A olddir/newfile + MERGE_SIDE1: A' newdir/newfile + MERGE_SIDE2: B modify olddir/newfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers olddir/ -> newdir/ + * MERGE_SIDE1 has cached olddir/newfile -> newdir/newfile + (NOT oldfile -> newdir/newfile; compare to case with + (p->status == 'R' && new_path) in possibly_cache_new_pair()) + + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Case 3: MERGE_SIDE1 adds new file to old dir, MERGE_SIDE2 renames old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Adds olddir/newfile + MERGE_SIDE2: A, Renames olddir/ -> newdir/ + => creates A', With newdir/newfile + + MERGE_BASE: A, Has newdir/, but no notion of newdir/newfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Has newdir/, but no notion of newdir/newfile + => expected B', with newdir/newfile from A' + + In this case, with the optimization, note that after the first commit there + were no renames on MERGE_SIDE1, and any renames on MERGE_SIDE2 are tossed. + But the second merge didn't need any renames so this is fine. + +Case 4: MERGE_SIDE1 renames file into old dir, MERGE_SIDE2 renames old dir + + This case looks like this: + + MERGE_BASE: E, Has olddir/ + MERGE_SIDE1: G, Renames oldfile -> olddir/newfile + MERGE_SIDE2: A, Renames olddir/ -> newdir/ + => creates A', With newdir/newfile representing original oldfile + + MERGE_BASE: A, Has oldfile + MERGE_SIDE1: A', Has newdir/newfile + MERGE_SIDE2: B, Modifies oldfile + => expected B', with threeway-merged newdir/newfile from above + + In this case, with the optimization, note that after the first commit: + * MERGE_SIDE1 remembers oldfile -> newdir/newfile + (NOT oldfile -> olddir/newfile; compare to case of second + block under p->status == 'R' in possibly_cache_new_pair()) + * MERGE_SIDE2 renames are tossed because only MERGE_SIDE1 is remembered + + Given the cached rename noted above, the second merge can proceed as + expected without needing to perform rename detection from A -> A'. + +Finally, I'll just note here that interactions with the +skip-irrelevant-renames optimization means we sometimes don't detect +renames for any files within a directory that was renamed, in which +case we will not have been able to detect any rename for the directory +itself. In such a case, we do not know whether the directory was +renamed; we want to be careful to avoid cacheing some kind of "this +directory was not renamed" statement. If we did, then a subsequent +commit being rebased could add a file to the old directory, and the +user would expect it to end up in the correct directory -- something +our erroneous "this directory was not renamed" cache would preclude. diff --git a/Documentation/technical/sparse-index.txt b/Documentation/technical/sparse-index.txt new file mode 100644 index 0000000000..3b24c1a219 --- /dev/null +++ b/Documentation/technical/sparse-index.txt @@ -0,0 +1,208 @@ +Git Sparse-Index Design Document +================================ + +The sparse-checkout feature allows users to focus a working directory on +a subset of the files at HEAD. The cone mode patterns, enabled by +`core.sparseCheckoutCone`, allow for very fast pattern matching to +discover which files at HEAD belong in the sparse-checkout cone. + +Three important scale dimensions for a Git working directory are: + +* `HEAD`: How many files are present at `HEAD`? + +* Populated: How many files are within the sparse-checkout cone. + +* Modified: How many files has the user modified in the working directory? + +We will use big-O notation -- O(X) -- to denote how expensive certain +operations are in terms of these dimensions. + +These dimensions are ordered by their magnitude: users (typically) modify +fewer files than are populated, and we can only populate files at `HEAD`. + +Problems occur if there is an extreme imbalance in these dimensions. For +example, if `HEAD` contains millions of paths but the populated set has +only tens of thousands, then commands like `git status` and `git add` can +be dominated by operations that require O(`HEAD`) operations instead of +O(Populated). Primarily, the cost is in parsing and rewriting the index, +which is filled primarily with files at `HEAD` that are marked with the +`SKIP_WORKTREE` bit. + +The sparse-index intends to take these commands that read and modify the +index from O(`HEAD`) to O(Populated). To do this, we need to modify the +index format in a significant way: add "sparse directory" entries. + +With cone mode patterns, it is possible to detect when an entire +directory will have its contents outside of the sparse-checkout definition. +Instead of listing all of the files it contains as individual entries, a +sparse-index contains an entry with the directory name, referencing the +object ID of the tree at `HEAD` and marked with the `SKIP_WORKTREE` bit. +If we need to discover the details for paths within that directory, we +can parse trees to find that list. + +At time of writing, sparse-directory entries violate expectations about the +index format and its in-memory data structure. There are many consumers in +the codebase that expect to iterate through all of the index entries and +see only files. In fact, these loops expect to see a reference to every +staged file. One way to handle this is to parse trees to replace a +sparse-directory entry with all of the files within that tree as the index +is loaded. However, parsing trees is slower than parsing the index format, +so that is a slower operation than if we left the index alone. The plan is +to make all of these integrations "sparse aware" so this expansion through +tree parsing is unnecessary and they use fewer resources than when using a +full index. + +The implementation plan below follows four phases to slowly integrate with +the sparse-index. The intention is to incrementally update Git commands to +interact safely with the sparse-index without significant slowdowns. This +may not always be possible, but the hope is that the primary commands that +users need in their daily work are dramatically improved. + +Phase I: Format and initial speedups +------------------------------------ + +During this phase, Git learns to enable the sparse-index and safely parse +one. Protections are put in place so that every consumer of the in-memory +data structure can operate with its current assumption of every file at +`HEAD`. + +At first, every index parse will call a helper method, +`ensure_full_index()`, which scans the index for sparse-directory entries +(pointing to trees) and replaces them with the full list of paths (with +blob contents) by parsing tree objects. This will be slower in all cases. +The only noticeable change in behavior will be that the serialized index +file contains sparse-directory entries. + +To start, we use a new required index extension, `sdir`, to allow +inserting sparse-directory entries into indexes with file format +versions 2, 3, and 4. This prevents Git versions that do not understand +the sparse-index from operating on one, while allowing tools that do not +understand the sparse-index to operate on repositories as long as they do +not interact with the index. A new format, index v5, will be introduced +that includes sparse-directory entries by default. It might also +introduce other features that have been considered for improving the +index, as well. + +Next, consumers of the index will be guarded against operating on a +sparse-index by inserting calls to `ensure_full_index()` or +`expand_index_to_path()`. If a specific path is requested, then those will +be protected from within the `index_file_exists()` and `index_name_pos()` +API calls: they will call `ensure_full_index()` if necessary. The +intention here is to preserve existing behavior when interacting with a +sparse-checkout. We don't want a change to happen by accident, without +tests. Many of these locations may not need any change before removing the +guards, but we should not do so without tests to ensure the expected +behavior happens. + +It may be desirable to _change_ the behavior of some commands in the +presence of a sparse index or more generally in any sparse-checkout +scenario. In such cases, these should be carefully communicated and +tested. No such behavior changes are intended during this phase. + +During a scan of the codebase, not every iteration of the cache entries +needs an `ensure_full_index()` check. The basic reasons include: + +1. The loop is scanning for entries with non-zero stage. These entries + are not collapsed into a sparse-directory entry. + +2. The loop is scanning for submodules. These entries are not collapsed + into a sparse-directory entry. + +3. The loop is part of the index API, especially around reading or + writing the format. + +4. The loop is checking for correct order of cache entries and that is + correct if and only if the sparse-directory entries are in the correct + location. + +5. The loop ignores entries with the `SKIP_WORKTREE` bit set, or is + otherwise already aware of sparse directory entries. + +6. The sparse-index is disabled at this point when using the split-index + feature, so no effort is made to protect the split-index API. + +Even after inserting these guards, we will keep expanding sparse-indexes +for most Git commands using the `command_requires_full_index` repository +setting. This setting will be on by default and disabled one builtin at a +time until we have sufficient confidence that all of the index operations +are properly guarded. + +To complete this phase, the commands `git status` and `git add` will be +integrated with the sparse-index so that they operate with O(Populated) +performance. They will be carefully tested for operations within and +outside the sparse-checkout definition. + +Phase II: Careful integrations +------------------------------ + +This phase focuses on ensuring that all index extensions and APIs work +well with a sparse-index. This requires significant increases to our test +coverage, especially for operations that interact with the working +directory outside of the sparse-checkout definition. Some of these +behaviors may not be the desirable ones, such as some tests already +marked for failure in `t1092-sparse-checkout-compatibility.sh`. + +The index extensions that may require special integrations are: + +* FS Monitor +* Untracked cache + +While integrating with these features, we should look for patterns that +might lead to better APIs for interacting with the index. Coalescing +common usage patterns into an API call can reduce the number of places +where sparse-directories need to be handled carefully. + +Phase III: Important command speedups +------------------------------------- + +At this point, the patterns for testing and implementing sparse-directory +logic should be relatively stable. This phase focuses on updating some of +the most common builtins that use the index to operate as O(Populated). +Here is a potential list of commands that could be valuable to integrate +at this point: + +* `git commit` +* `git checkout` +* `git merge` +* `git rebase` + +Hopefully, commands such as `git merge` and `git rebase` can benefit +instead from merge algorithms that do not use the index as a data +structure, such as the merge-ORT strategy. As these topics mature, we +may enable the ORT strategy by default for repositories using the +sparse-index feature. + +Along with `git status` and `git add`, these commands cover the majority +of users' interactions with the working directory. In addition, we can +integrate with these commands: + +* `git grep` +* `git rm` + +These have been proposed as some whose behavior could change when in a +repo with a sparse-checkout definition. It would be good to include this +behavior automatically when using a sparse-index. Some clarity is needed +to make the behavior switch clear to the user. + +This phase is the first where parallel work might be possible without too +much conflicts between topics. + +Phase IV: The long tail +----------------------- + +This last phase is less a "phase" and more "the new normal" after all of +the previous work. + +To start, the `command_requires_full_index` option could be removed in +favor of expanding only when hitting an API guard. + +There are many Git commands that could use special attention to operate as +O(Populated), while some might be so rare that it is acceptable to leave +them with additional overhead when a sparse-index is present. + +Here are some commands that might be useful to update: + +* `git sparse-checkout set` +* `git am` +* `git clean` +* `git stash` diff --git a/Documentation/user-manual.txt b/Documentation/user-manual.txt index fd480b8645..865074bed4 100644 --- a/Documentation/user-manual.txt +++ b/Documentation/user-manual.txt @@ -1,5 +1,8 @@ = Git User Manual +[preface] +== Introduction + Git is a fast distributed revision control system. This manual is designed to be readable by someone with basic UNIX @@ -2789,7 +2792,7 @@ A fast-forward looks something like this: In some cases it is possible that the new head will *not* actually be a descendant of the old head. For example, the developer may have -realized she made a serious mistake, and decided to backtrack, +realized a serious mistake was made and decided to backtrack, resulting in a situation like: ................................................ @@ -3187,7 +3190,7 @@ that *updated* thing--the old state that you added originally ends up not being pointed to by any commit or tree, so it's now a dangling blob object. -Similarly, when the "recursive" merge strategy runs, and finds that +Similarly, when the "ort" merge strategy runs, and finds that there are criss-cross merges and thus more than one merge base (which is fairly unusual, but it does happen), it will generate one temporary midway tree (or possibly even more, if you had lots of criss-crossing |