diff options
352 files changed, 21311 insertions, 7849 deletions
diff --git a/.gitignore b/.gitignore index 14e2b6bde9..4cb14e0bae 100644 --- a/.gitignore +++ b/.gitignore @@ -158,7 +158,7 @@ /gitk-git/gitk-wish /gitweb/GITWEB-BUILD-OPTIONS /gitweb/gitweb.cgi -/gitweb/gitweb.min.* +/gitweb/static/gitweb.min.* /test-chmtime /test-ctype /test-date @@ -166,12 +166,17 @@ /test-dump-cache-tree /test-genrandom /test-index-version +/test-line-buffer /test-match-trees +/test-obj-pool /test-parse-options /test-path-utils /test-run-command /test-sha1 /test-sigchain +/test-string-pool +/test-svn-fe +/test-treap /common-cmds.h *.tar.gz *.dsc diff --git a/Documentation/Makefile b/Documentation/Makefile index a4c4063e50..e117bc4315 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -279,7 +279,7 @@ $(patsubst %,%.html,$(API_DOCS) technical/api-index): %.html : %.txt XSLT = docbook.xsl XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css -user-manual.html: user-manual.xml +user-manual.html: user-manual.xml $(XSLT) $(QUIET_XSLTPROC)$(RM) $@+ $@ && \ xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \ mv $@+ $@ diff --git a/Documentation/RelNotes-1.5.6.3.txt b/Documentation/RelNotes-1.5.6.3.txt index 942611299d..f61dd3504a 100644 --- a/Documentation/RelNotes-1.5.6.3.txt +++ b/Documentation/RelNotes-1.5.6.3.txt @@ -4,7 +4,7 @@ GIT v1.5.6.3 Release Notes Fixes since v1.5.6.2 -------------------- -* Setting core.sharerepository to traditional "true" value was supposed to make +* Setting core.sharedrepository to traditional "true" value was supposed to make the repository group writable but should not affect permission for others. However, since 1.5.6, it was broken to drop permission for others when umask is 022, making the repository unreadable by others. diff --git a/Documentation/RelNotes-1.6.0.2.txt b/Documentation/RelNotes-1.6.0.2.txt index 51b32f5d94..e1e24b3295 100644 --- a/Documentation/RelNotes-1.6.0.2.txt +++ b/Documentation/RelNotes-1.6.0.2.txt @@ -17,7 +17,7 @@ Fixes since v1.6.0.1 * Many commands did not use the correct working tree location when used with GIT_WORK_TREE environment settings. -* Some systems needs to use compatibility fnmach and regex libraries +* Some systems need to use compatibility fnmatch and regex libraries independent from each other; the compat/ area has been reorganized to allow this. diff --git a/Documentation/RelNotes-1.6.4.3.txt b/Documentation/RelNotes-1.6.4.3.txt index 4f29babdeb..5643e6537d 100644 --- a/Documentation/RelNotes-1.6.4.3.txt +++ b/Documentation/RelNotes-1.6.4.3.txt @@ -11,7 +11,7 @@ Fixes since v1.6.4.2 been deprecated. * "git fetch" and "git clone" had an extra sanity check to verify the - presense of the corresponding *.pack file before downloading *.idx + presence of the corresponding *.pack file before downloading *.idx file by issuing a HEAD request. Github server however sometimes gave 500 (Internal server error) response to HEAD even if a GET request for *.pack file to the same URL would have succeeded, and broke diff --git a/Documentation/RelNotes-1.6.5.4.txt b/Documentation/RelNotes-1.6.5.4.txt index e42f8b2397..d3a2a3e712 100644 --- a/Documentation/RelNotes-1.6.5.4.txt +++ b/Documentation/RelNotes-1.6.5.4.txt @@ -26,7 +26,7 @@ Fixes since v1.6.5.3 future versions, but not in this release, * "git merge -m <message> <branch>..." added the standard merge message - on its own after user-supplied message, which should have overrided the + on its own after user-supplied message, which should have overridden the standard one. Other minor documentation updates are included. diff --git a/Documentation/RelNotes-1.6.5.7.txt b/Documentation/RelNotes-1.6.5.7.txt index 5b49ea53be..dc5302c21c 100644 --- a/Documentation/RelNotes-1.6.5.7.txt +++ b/Documentation/RelNotes-1.6.5.7.txt @@ -10,7 +10,7 @@ Fixes since v1.6.5.6 an older version of git should just ignore them. Instead we diagnosed it as an error. -* With help.autocorrect set to non-zero value, the logic to guess typoes +* With help.autocorrect set to non-zero value, the logic to guess typos in the subcommand name misfired and ran a random nonsense command. * If a command is run with an absolute path as a pathspec inside a bare diff --git a/Documentation/RelNotes-1.6.6.txt b/Documentation/RelNotes-1.6.6.txt index 04e205c457..c50b59c495 100644 --- a/Documentation/RelNotes-1.6.6.txt +++ b/Documentation/RelNotes-1.6.6.txt @@ -29,7 +29,7 @@ or adjust to the new behaviour, on the day their sysadmin decides to install the new version of git. When we switched from "git-foo" to "git foo" in 1.6.0, even though the change had been advertised and the transition guide had been provided for a very long time, the users procrastinated -during the entire transtion period, and ended up panicking on the day +during the entire transition period, and ended up panicking on the day their sysadmins updated their git installation. We are trying to avoid repeating that unpleasantness in the 1.7.0 release. @@ -94,7 +94,7 @@ users will fare this time. * "git diff" traditionally treated various "ignore whitespace" options only as a way to filter the patch output. "git diff --exit-code -b" exited with non-zero status even if all changes were about changing the - ammount of whitespace and nothing else. and "git diff -b" showed the + amount of whitespace and nothing else. and "git diff -b" showed the "diff --git" header line for such a change without patch text. In 1.7.0, the "ignore whitespaces" will affect the semantics of the diff --git a/Documentation/RelNotes-1.7.0.7.txt b/Documentation/RelNotes-1.7.0.7.txt new file mode 100644 index 0000000000..d0cb7ca7e2 --- /dev/null +++ b/Documentation/RelNotes-1.7.0.7.txt @@ -0,0 +1,16 @@ +Git v1.7.0.7 Release Notes +========================== + +Fixes since v1.7.0.6 +-------------------- + + * "make NO_CURL=NoThanks install" was broken. + + * An overlong line after ".gitdir: " in a git file caused out of bounds + access to an array on the stack. + + * "git config --path conf.var" to attempt to expand a variable conf.var + that uses "~/" short-hand segfaulted when $HOME environment variable + was not set. + +And other minor fixes and documentation updates. diff --git a/Documentation/RelNotes-1.7.0.txt b/Documentation/RelNotes-1.7.0.txt index 43e3f33615..0bb8c0b2a2 100644 --- a/Documentation/RelNotes-1.7.0.txt +++ b/Documentation/RelNotes-1.7.0.txt @@ -202,7 +202,7 @@ release, unless otherwise noted. the branch is fully merged to its upstream branch if it is not merged to the current branch. It now deletes it in such a case. - * "fiter-branch" command incorrectly said --prune-empty and --filter-commit + * "filter-branch" command incorrectly said --prune-empty and --filter-commit were incompatible; the latter should be read as --commit-filter. * When using "git status" or asking "git diff" to compare the work tree diff --git a/Documentation/RelNotes-1.7.1.2.txt b/Documentation/RelNotes-1.7.1.2.txt index 46b6a960c7..61ba14e262 100644 --- a/Documentation/RelNotes-1.7.1.2.txt +++ b/Documentation/RelNotes-1.7.1.2.txt @@ -17,3 +17,12 @@ Fixes since v1.7.1.1 * "git rev-parse --parseopt --stop-at-non-option" did not stop at non option when --keep-dashdash was in effect. + + * An overlong line after ".gitdir: " in a git file caused out of bounds + access to an array on the stack. + + * "git config --path conf.var" to attempt to expand a variable conf.var + that uses "~/" short-hand segfaulted when $HOME environment variable + was not set. + +And other minor fixes and documentation updates. diff --git a/Documentation/RelNotes-1.7.2.1.txt b/Documentation/RelNotes-1.7.2.1.txt new file mode 100644 index 0000000000..1103c47a4f --- /dev/null +++ b/Documentation/RelNotes-1.7.2.1.txt @@ -0,0 +1,25 @@ +Git v1.7.2.1 Release Notes +========================== + +Fixes since v1.7.2 +------------------ + + * "git instaweb" wasn't useful when your Apache was installed under a + name other than apache2 (e.g. "httpd"). + + * Similarly, "git web--browse" (invoked by "git help -w") learned that + chrome browser is sometimes called google-chrome. + + * An overlong line after ".gitdir: " in a git file caused out of bounds + access to an array on the stack. + + * "git config --path conf.var" to attempt to expand a variable conf.var + that uses "~/" short-hand segfaulted when $HOME environment variable + was not set. + + * Documentation on Cygwin failed to build. + + * The error message from "git pull blarg" when 'blarg' is an unknown + remote name has been improved. + +And other minor fixes and documentation updates. diff --git a/Documentation/RelNotes-1.7.2.2.txt b/Documentation/RelNotes-1.7.2.2.txt new file mode 100644 index 0000000000..71eb6a8b0a --- /dev/null +++ b/Documentation/RelNotes-1.7.2.2.txt @@ -0,0 +1,22 @@ +Git v1.7.2.2 Release Notes +========================== + +Fixes since v1.7.2.1 +-------------------- + + * Object transfer over smart http transport deadlocked the client when + the remote HTTP server returned a failure, instead of erroring it out. + + * git-gui honors custom textconv filters when showing diff and blame; + + * git diff --relative=subdir (without the necessary trailing /) did not + work well; + + * "git diff-files -p --submodule" was recently broken; + + * "git checkout -b n ':/token'" did not work; + + * "git index-pack" (hence "git fetch/clone/pull/push") enabled the object + replacement machinery by mistake (it never should have); + +And other minor fixes and documentation updates. diff --git a/Documentation/RelNotes-1.7.2.3.txt b/Documentation/RelNotes-1.7.2.3.txt new file mode 100644 index 0000000000..610960cfe1 --- /dev/null +++ b/Documentation/RelNotes-1.7.2.3.txt @@ -0,0 +1,39 @@ +Git v1.7.2.3 Release Notes +========================== + +Fixes since v1.7.2.2 +-------------------- + + * When people try insane things such as delta-compressing 4GiB files, we + threw an assertion failure. + + * "git archive" gave the full commit ID for "$Format:%h$". + + * "git fetch --tags" did not fetch tags when remote.<nick>.tagopt was set + to --no-tags. The command line option now overrides the configuration + setting. + + * "git for-each-ref --format='%(objectname:short)'" has been completely + broken for a long time. + + * "git gc" incorrectly pruned a rerere record that was created long + time ago but still is actively and repeatedly used. + + * "git log --follow -M -p" was seriously broken in 1.7.2, reporting + assertion failure. + + * Running "git log" with an incorrect option started pager nevertheless, + forcing the user to dismiss it. + + * "git rebase" did not work well when the user has diff.renames + configuration variable set. + + * An earlier (and rather old) fix to "git rebase" against a rebased + upstream broke a more normal, non rebased upstream case rather badly, + attempting to re-apply patches that are already accepted upstream. + + * "git submodule sync" forgot to update the superproject's config file + when submodule URL changed. + + * "git pack-refs --all --prune" did not remove a directory that has + become empty. diff --git a/Documentation/RelNotes-1.7.3.txt b/Documentation/RelNotes-1.7.3.txt new file mode 100644 index 0000000000..3512bbb238 --- /dev/null +++ b/Documentation/RelNotes-1.7.3.txt @@ -0,0 +1,73 @@ +Git v1.7.3 Release Notes (draft) +================================ + +Updates since v1.7.2 +-------------------- + + * git-gui got various updates and a new maintainer, Pat Thoyts. + + * Gitweb allows its configuration to change per each request; it used to + read the configuration once upon startup. + + * When git finds a corrupt object, it now reports the file that contains + it. + + * "git checkout -B <it>" is a shorter way to say "git branch -f <it>" + followed by "git checkout <it>". + + * When "git checkout" or "git merge" refuse to proceed in order to + protect local modification to your working tree, they used to stop + after showing just one path that might be lost. They now show all, + in a format that is easier to read. + + * "git clean" learned "-e" ("--exclude") option. + + * Hunk headers produced for C# files by "git diff" and friends show more + relevant context than before. + + * diff.ignoresubmodules configuration variable can be used to squelch the + differences in submodules reported when running commands (e.g. "diff", + "status", etc.) at the superproject level. + + * http.useragent configuration can be used to lie who you are to your + restrictive firewall. + + * "git rebase --strategy <s>" learned "-X" option to pass extra options + that are understood by the chosen merge strategy. + + * "git rebase -i" learned "exec" that you can insert into the insn sheet + to run a command between its steps. + + * "git rebase" between branches that have many binary changes that do + not conflict should be faster. + + * "git rebase -i" peeks into rebase.autosquash configuration and acts as + if you gave --autosquash from the command line. + + +Also contains various documentation updates. + + +Fixes since v1.7.2 +------------------ + +All of the fixes in v1.7.2.X maintenance series are included in this +release, unless otherwise noted. + + * "git merge -s recursive" (which is the default) did not handle cases + where a directory becomes a file (or vice versa) very well. + + * "git fetch" and friends were accidentally broken for url with "+" in + its path, e.g. "git://git.gnome.org/gtk+". + +--- +exec >/var/tmp/1 +echo O=$(git describe master) +O=v1.7.2.2-268-g7e42332 +O=v1.7.2 +git shortlog --no-merges $O..master ^maint +exit 0 + +What did we want to do with... + +1e3d411 (Enable custom schemes for column colors in the graph API, 2010-07-13) diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index eb53e0636e..ece3c77482 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -7,17 +7,16 @@ Checklist (and a short version for the impatient): before committing - do not check in commented out code or unneeded files - the first line of the commit message should be a short - description and should skip the full stop + description (50 characters is the soft limit, see DISCUSSION + in git-commit(1)), and should skip the full stop - the body should provide a meaningful commit message, which: - uses the imperative, present tense: "change", not "changed" or "changes". - includes motivation for the change, and contrasts its implementation with previous behaviour - - if you want your work included in git.git, add a - "Signed-off-by: Your Name <you@example.com>" line to the - commit message (or just use the option "-s" when - committing) to confirm that you agree to the Developer's - Certificate of Origin + - add a "Signed-off-by: Your Name <you@example.com>" line to the + commit message (or just use the option "-s" when committing) + to confirm that you agree to the Developer's Certificate of Origin - make sure that you have tests for the bug you are fixing - make sure that the test suite passes after your commit diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf index 87a90f2c3f..aea8627be0 100644 --- a/Documentation/asciidoc.conf +++ b/Documentation/asciidoc.conf @@ -16,8 +16,11 @@ plus=+ caret=^ startsb=[ endsb=] +backslash=\ tilde=~ +apostrophe=' backtick=` +litdd=-- ifdef::backend-docbook[] [linkgit-inlinemacro] diff --git a/Documentation/config.txt b/Documentation/config.txt index e75434b3ef..0510ac795c 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -128,7 +128,7 @@ advice.*:: when writing commit messages. Default: true. commitBeforeMerge:: Advice shown when linkgit:git-merge[1] refuses to - merge to avoid overwritting local changes. + merge to avoid overwriting local changes. Default: true. resolveConflict:: Advices shown by various commands when conflicts @@ -418,7 +418,7 @@ Common unit suffixes of 'k', 'm', or 'g' are supported. core.deltaBaseCacheLimit:: Maximum number of bytes to reserve for caching base objects - that multiple deltafied objects reference. By storing the + that may be referenced by multiple deltified objects. By storing the entire decompressed base objects in a cache Git is able to avoid unpacking and decompressing frequently used base objects multiple times. @@ -563,7 +563,7 @@ not necessarily be the current directory. am.keepcr:: If true, git-am will call git-mailsplit for patches in mbox format with parameter '--keep-cr'. In this case git-mailsplit will - not remove `\r` from lines ending with `\r\n`. Can be overrriden + not remove `\r` from lines ending with `\r\n`. Can be overridden by giving '--no-keep-cr' from the command line. See linkgit:git-am[1], linkgit:git-mailsplit[1]. @@ -826,6 +826,11 @@ diff.renames:: will enable basic rename detection. If set to "copies" or "copy", it will detect copies, as well. +diff.ignoreSubmodules:: + Sets the default value of --ignore-submodules. Note that this + affects only 'git diff' Porcelain, and not lower level 'diff' + commands such as 'git diff-files'. + diff.suppressBlankEmpty:: A boolean to inhibit the standard behavior of printing a space before each empty output line. Defaults to false. @@ -1002,7 +1007,7 @@ gitcvs.usecrlfattr:: If true, the server will look up the end-of-line conversion attributes for files to determine the '-k' modes to use. If the attributes force git to treat a file as text, - the '-k' mode will be left blank so cvs clients will + the '-k' mode will be left blank so CVS clients will treat it as text. If they suppress text conversion, the file will be set with '-kb' mode, which suppresses any newline munging the client might otherwise do. If the attributes do not allow @@ -1243,6 +1248,15 @@ http.noEPSV:: support EPSV mode. Can be overridden by the 'GIT_CURL_FTP_NO_EPSV' environment variable. Default is false (curl will use EPSV). +http.useragent:: + The HTTP USER_AGENT string presented to an HTTP server. The default + value represents the version of the client git such as git/1.7.1. + This option allows you to override this value to a more common value + such as Mozilla/4.0. This may be necessary, for instance, if + connecting through a firewall that restricts HTTP connections to a set + of common USER_AGENT strings (but not including those like git/1.7.1). + Can be overridden by the 'GIT_HTTP_USER_AGENT' environment variable. + i18n.commitEncoding:: Character encoding the commit messages are stored in; git itself does not care per se, but this information is necessary e.g. when @@ -1275,7 +1289,9 @@ instaweb.local:: be bound to the local IP (127.0.0.1). instaweb.modulepath:: - The module path for an apache httpd used by linkgit:git-instaweb[1]. + The default module path for linkgit:git-instaweb[1] to use + instead of /usr/lib/apache2/modules. Only used if httpd + is Apache. instaweb.port:: The port number to bind the gitweb httpd to. See @@ -1289,10 +1305,11 @@ interactive.singlekey:: ignored if portable keystroke input is not available. log.date:: - Set default date-time mode for the log command. Setting log.date - value is similar to using 'git log'\'s --date option. The value is one of the - following alternatives: {relative,local,default,iso,rfc,short}. - See linkgit:git-log[1]. + Set the default date-time mode for the 'log' command. + Setting a value for log.date is similar to using 'git log''s + `\--date` option. Possible values are `relative`, `local`, + `default`, `iso`, `rfc`, and `short`; see linkgit:git-log[1] + for details. log.decorate:: Print out the ref names of any commits that are shown by the log @@ -1533,6 +1550,9 @@ rebase.stat:: Whether to show a diffstat of what changed upstream since the last rebase. False by default. +rebase.autosquash:: + If set to true enable '--autosquash' option by default. + receive.autogc:: By default, git-receive-pack will run "git-gc --auto" after receiving data from git-push and updating refs. You can stop @@ -1558,6 +1578,10 @@ receive.denyDeletes:: If set to true, git-receive-pack will deny a ref update that deletes the ref. Use this to prevent such a ref deletion via a push. +receive.denyDeleteCurrent:: + If set to true, git-receive-pack will deny a ref update that + deletes the currently checked out branch of a non-bare repository. + receive.denyCurrentBranch:: If set to true or "refuse", git-receive-pack will deny a ref update to the currently checked out branch of a non-bare repository. @@ -1623,7 +1647,9 @@ remote.<name>.tagopt:: Setting this value to \--no-tags disables automatic tag following when fetching from remote <name>. Setting it to \--tags will fetch every tag from remote <name>, even if they are not reachable from remote - branch heads. + branch heads. Passing these flags directly to linkgit:git-fetch[1] can + override this setting. See options \--tags and \--no-tags of + linkgit:git-fetch[1]. remote.<name>.vcs:: Setting this to a value <vcs> will cause git to interact with @@ -1743,6 +1769,19 @@ submodule.<name>.update:: URL and other values found in the `.gitmodules` file. See linkgit:git-submodule[1] and linkgit:gitmodules[5] for details. +submodule.<name>.ignore:: + Defines under what circumstances "git status" and the diff family show + a submodule as modified. When set to "all", it will never be considered + modified, "dirty" will ignore all changes to the submodules work tree and + takes only differences between the HEAD of the submodule and the commit + recorded in the superproject into account. "untracked" will additionally + let submodules with modified tracked files in their work tree show up. + Using "none" (the default when this option is not set) also shows + submodules that have untracked files in their work tree as changed. + This setting overrides any setting made in .gitmodules for this submodule, + both settings can be overridden on the command line by using the + "--ignore-submodules" option. + tar.umask:: This variable can be used to restrict the permission bits of tar archive entries. The default is 0002, which turns off the diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 2371262b10..4656a97e60 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -206,10 +206,29 @@ endif::git-format-patch[] the diff-patch output format. Non default number of digits can be specified with `--abbrev=<n>`. --B:: - Break complete rewrite changes into pairs of delete and create. - --M:: +-B[<n>][/<m>]:: + Break complete rewrite changes into pairs of delete and + create. This serves two purposes: ++ +It affects the way a change that amounts to a total rewrite of a file +not as a series of deletion and insertion mixed together with a very +few lines that happen to match textually as the context, but as a +single deletion of everything old followed by a single insertion of +everything new, and the number `m` controls this aspect of the -B +option (defaults to 60%). `-B/70%` specifies that less than 30% of the +original should remain in the result for git to consider it a total +rewrite (i.e. otherwise the resulting patch will be a series of +deletion and insertion mixed together with context lines). ++ +When used with -M, a totally-rewritten file is also considered as the +source of a rename (usually -M only considers a file that disappeared +as the source of a rename), and the number `n` controls this aspect of +the -B option (defaults to 50%). `-B20%` specifies that a change with +addition and deletion compared to 20% or more of the file's size are +eligible for being picked up as a possible source of a rename to +another file. + +-M[<n>]:: ifndef::git-log[] Detect renames. endif::git-log[] @@ -218,9 +237,15 @@ ifdef::git-log[] For following files across renames while traversing history, see `--follow`. endif::git-log[] + If `n` is specified, it is a is a threshold on the similarity + index (i.e. amount of addition/deletions compared to the + file's size). For example, `-M90%` means git should consider a + delete/add pair to be a rename if more than 90% of the file + hasn't changed. --C:: +-C[<n>]:: Detect copies as well as renames. See also `--find-copies-harder`. + If `n` is specified, it has the same meaning as for `-M<n>`. ifndef::git-format-patch[] --diff-filter=[ACDMRTUXB*]:: @@ -330,7 +355,11 @@ endif::git-format-patch[] --ignore-submodules[=<when>]:: Ignore changes to submodules in the diff generation. <when> can be - either "untracked", "dirty" or "all", which is the default. When + either "none", "untracked", "dirty" or "all", which is the default + Using "none" will consider the submodule modified when it either contains + untracked or modified files or its HEAD differs from the commit recorded + in the superproject and can be used to override any settings of the + 'ignore' option in linkgit:git-config[1] or linkgit:gitmodules[5]. When "untracked" is used submodules are not considered dirty when they only contain untracked content (but they are still scanned for modified content). Using "dirty" ignores all changes to the work tree of submodules, diff --git a/Documentation/docbook.xsl b/Documentation/docbook.xsl index 9a6912c641..da8b05b922 100644 --- a/Documentation/docbook.xsl +++ b/Documentation/docbook.xsl @@ -1,5 +1,8 @@ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version='1.0'> <xsl:import href="http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl"/> - <xsl:output method="html" encoding="UTF-8" indent="no" /> + <xsl:output method="html" + encoding="UTF-8" indent="no" + doctype-public="-//W3C//DTD HTML 4.01//EN" + doctype-system="http://www.w3.org/TR/html4/strict.dtd" /> </xsl:stylesheet> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt index 9333c42c55..470ac31396 100644 --- a/Documentation/fetch-options.txt +++ b/Documentation/fetch-options.txt @@ -49,7 +49,9 @@ ifndef::git-pull[] endif::git-pull[] By default, tags that point at objects that are downloaded from the remote repository are fetched and stored locally. - This option disables this automatic tag following. + This option disables this automatic tag following. The default + behavior for a remote may be specified with the remote.<name>.tagopt + setting. See linkgit:git-config[1]. -t:: --tags:: @@ -58,7 +60,9 @@ endif::git-pull[] objects reachable from the branch heads that are being tracked will not be fetched by this mechanism. This flag lets all tags and their associated objects be - downloaded. + downloaded. The default behavior for a remote may be + specified with the remote.<name>.tagopt setting. See + linkgit:git-config[1]. -u:: --update-head-ok:: diff --git a/Documentation/git-add.txt b/Documentation/git-add.txt index e22a62f065..73378b2bef 100644 --- a/Documentation/git-add.txt +++ b/Documentation/git-add.txt @@ -157,14 +157,14 @@ those in info/exclude. See linkgit:gitrepository-layout[5]. EXAMPLES -------- -* Adds content from all `\*.txt` files under `Documentation` directory +* Adds content from all `*.txt` files under `Documentation` directory and its subdirectories: + ------------ $ git add Documentation/\*.txt ------------ + -Note that the asterisk `\*` is quoted from the shell in this +Note that the asterisk `*` is quoted from the shell in this example; this lets the command include the files from subdirectories of `Documentation/` directory. @@ -220,7 +220,7 @@ binary so line count cannot be shown) and there is no difference between indexed copy and the working tree version (if the working tree version were also different, 'binary' would have been shown in place of 'nothing'). The -other file, git-add--interactive.perl, has 403 lines added +other file, git-add{litdd}interactive.perl, has 403 lines added and 35 lines deleted if you commit what is in the index, but working tree file has further modifications (one addition and one deletion). diff --git a/Documentation/git-apply.txt b/Documentation/git-apply.txt index 8463439ac5..4a74b23d40 100644 --- a/Documentation/git-apply.txt +++ b/Documentation/git-apply.txt @@ -26,6 +26,10 @@ with the `--cache` option the patch is only applied to the index. Without these options, the command applies the patch only to files, and does not require them to be in a git repository. +This command applies the patch but does not create a commit. Use +linkgit:git-am[1] to create commits from patches generated by +linkgit:git-format-patch[1] and/or received by email. + OPTIONS ------- <patch>...:: @@ -242,6 +246,12 @@ If `--index` is not specified, then the submodule commits in the patch are ignored and only the absence or presence of the corresponding subdirectory is checked and (if possible) updated. + +SEE ALSO +-------- +linkgit:git-am[1]. + + Author ------ Written by Linus Torvalds <torvalds@osdl.org> diff --git a/Documentation/git-archimport.txt b/Documentation/git-archimport.txt index 4d4325f222..4f358c8d6c 100644 --- a/Documentation/git-archimport.txt +++ b/Documentation/git-archimport.txt @@ -44,7 +44,7 @@ archives that it imports, it is also possible to specify git branch names manually. To do so, write a git branch name after each <archive/branch> parameter, separated by a colon. This way, you can shorten the Arch branch names and convert Arch jargon to git jargon, for example mapping a -"PROJECT--devo--VERSION" branch to "master". +"PROJECT{litdd}devo{litdd}VERSION" branch to "master". Associating multiple Arch branches to one git branch is possible; the result will make the most sense only if no commits are made to the first @@ -85,8 +85,8 @@ OPTIONS -o:: Use this for compatibility with old-style branch names used by earlier versions of 'git archimport'. Old-style branch names - were category--branch, whereas new-style branch names are - archive,category--branch--version. In both cases, names given + were category{litdd}branch, whereas new-style branch names are + archive,category{litdd}branch{litdd}version. In both cases, names given on the command-line will override the automatically-generated ones. diff --git a/Documentation/git-bisect-lk2009.txt b/Documentation/git-bisect-lk2009.txt index 86b3015c13..8a2ba37904 100644 --- a/Documentation/git-bisect-lk2009.txt +++ b/Documentation/git-bisect-lk2009.txt @@ -873,7 +873,7 @@ c * N * T + b * M * log2(M) tests where c is the number of rounds of test (so a small constant) and b is the ratio of bug per commit (hopefully a small constant too). -So of course it's much better as it's O(N \* T) vs O(N \* T \* M) if +So of course it's much better as it's O(N * T) vs O(N * T * M) if you would test everything after each commit. This means that test suites are good to prevent some bugs from being @@ -971,7 +971,7 @@ logical change in each commit. The smaller the changes in your commit, the most effective "git bisect" will be. And you will probably need "git bisect" less in the first place, as small changes are easier to review even if they are -only reviewed by the commiter. +only reviewed by the committer. Another good idea is to have good commit messages. They can be very helpful to understand why some changes were made. diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt index a5ed8fb05b..38e59afb34 100644 --- a/Documentation/git-bundle.txt +++ b/Documentation/git-bundle.txt @@ -9,7 +9,7 @@ git-bundle - Move objects and refs by archive SYNOPSIS -------- [verse] -'git bundle' create <file> <git-rev-list args> +'git bundle' create <file> <git-rev-list-args> 'git bundle' verify <file> 'git bundle' list-heads <file> [refname...] 'git bundle' unbundle <file> [refname...] @@ -34,57 +34,58 @@ OPTIONS ------- create <file>:: - Used to create a bundle named 'file'. This requires the - 'git rev-list' arguments to define the bundle contents. + Used to create a bundle named 'file'. This requires the + 'git-rev-list-args' arguments to define the bundle contents. verify <file>:: - Used to check that a bundle file is valid and will apply - cleanly to the current repository. This includes checks on the - bundle format itself as well as checking that the prerequisite - commits exist and are fully linked in the current repository. - 'git bundle' prints a list of missing commits, if any, and exits - with a non-zero status. + Used to check that a bundle file is valid and will apply + cleanly to the current repository. This includes checks on the + bundle format itself as well as checking that the prerequisite + commits exist and are fully linked in the current repository. + 'git bundle' prints a list of missing commits, if any, and exits + with a non-zero status. list-heads <file>:: - Lists the references defined in the bundle. If followed by a - list of references, only references matching those given are - printed out. + Lists the references defined in the bundle. If followed by a + list of references, only references matching those given are + printed out. unbundle <file>:: - Passes the objects in the bundle to 'git index-pack' - for storage in the repository, then prints the names of all - defined references. If a list of references is given, only - references matching those in the list are printed. This command is - really plumbing, intended to be called only by 'git fetch'. - -[git-rev-list-args...]:: - A list of arguments, acceptable to 'git rev-parse' and - 'git rev-list', that specifies the specific objects and references - to transport. For example, `master\~10..master` causes the - current master reference to be packaged along with all objects - added since its 10th ancestor commit. There is no explicit - limit to the number of references and objects that may be - packaged. + Passes the objects in the bundle to 'git index-pack' + for storage in the repository, then prints the names of all + defined references. If a list of references is given, only + references matching those in the list are printed. This command is + really plumbing, intended to be called only by 'git fetch'. + +<git-rev-list-args>:: + A list of arguments, acceptable to 'git rev-parse' and + 'git rev-list' (and containg a named ref, see SPECIFYING REFERENCES + below), that specifies the specific objects and references + to transport. For example, `master{tilde}10..master` causes the + current master reference to be packaged along with all objects + added since its 10th ancestor commit. There is no explicit + limit to the number of references and objects that may be + packaged. [refname...]:: - A list of references used to limit the references reported as - available. This is principally of use to 'git fetch', which - expects to receive only those references asked for and not - necessarily everything in the pack (in this case, 'git bundle' acts - like 'git fetch-pack'). + A list of references used to limit the references reported as + available. This is principally of use to 'git fetch', which + expects to receive only those references asked for and not + necessarily everything in the pack (in this case, 'git bundle' acts + like 'git fetch-pack'). SPECIFYING REFERENCES --------------------- 'git bundle' will only package references that are shown by 'git show-ref': this includes heads, tags, and remote heads. References -such as `master\~1` cannot be packaged, but are perfectly suitable for +such as `master{tilde}1` cannot be packaged, but are perfectly suitable for defining the basis. More than one reference may be packaged, and more than one basis can be specified. The objects packaged are those not contained in the union of the given bases. Each basis can be -specified explicitly (e.g. `^master\~10`), or implicitly (e.g. -`master\~10..master`, `--since=10.days.ago master`). +specified explicitly (e.g. `^master{tilde}10`), or implicitly (e.g. +`master{tilde}10..master`, `--since=10.days.ago master`). It is very important that the basis used be held by the destination. It is okay to err on the side of caution, causing the bundle file @@ -154,7 +155,7 @@ machineB$ git pull If you know up to what commit the intended recipient repository should have the necessary objects, you can use that knowledge to specify the basis, giving a cut-off point to limit the revisions and objects that go -in the resulting bundle. The previous example used lastR2bundle tag +in the resulting bundle. The previous example used the lastR2bundle tag for this purpose, but you can use any other options that you would give to the linkgit:git-log[1] command. Here are more examples: @@ -194,7 +195,7 @@ references when fetching: $ git fetch mybundle master:localRef ---------------- -You can also see what references it offers. +You can also see what references it offers: ---------------- $ git ls-remote mybundle diff --git a/Documentation/git-checkout-index.txt b/Documentation/git-checkout-index.txt index d6aa6e14eb..62f9ab24c9 100644 --- a/Documentation/git-checkout-index.txt +++ b/Documentation/git-checkout-index.txt @@ -13,7 +13,7 @@ SYNOPSIS [--stage=<number>|all] [--temp] [-z] [--stdin] - [--] [<file>]\* + [--] [<file>]* DESCRIPTION ----------- diff --git a/Documentation/git-checkout.txt b/Documentation/git-checkout.txt index 1bacd2e104..f88e9977d1 100644 --- a/Documentation/git-checkout.txt +++ b/Documentation/git-checkout.txt @@ -9,7 +9,7 @@ SYNOPSIS -------- [verse] 'git checkout' [-q] [-f] [-m] [<branch>] -'git checkout' [-q] [-f] [-m] [[-b|--orphan] <new_branch>] [<start_point>] +'git checkout' [-q] [-f] [-m] [[-b|-B|--orphan] <new_branch>] [<start_point>] 'git checkout' [-f|--ours|--theirs|-m|--conflict=<style>] [<tree-ish>] [--] <paths>... 'git checkout' --patch [<tree-ish>] [--] [<paths>...] @@ -21,7 +21,7 @@ also update `HEAD` to set the specified branch as the current branch. 'git checkout' [<branch>]:: -'git checkout' -b <new branch> [<start point>]:: +'git checkout' -b|-B <new_branch> [<start point>]:: This form switches branches by updating the index, working tree, and HEAD to reflect the specified branch. @@ -31,6 +31,17 @@ were called and then checked out; in this case you can use the `--track` or `--no-track` options, which will be passed to 'git branch'. As a convenience, `--track` without `-b` implies branch creation; see the description of `--track` below. ++ +If `-B` is given, <new_branch> is created if it doesn't exist; otherwise, it +is reset. This is the transactional equivalent of ++ +------------ +$ git branch -f <branch> [<start point>] +$ git checkout <branch> +------------ ++ +that is to say, the branch is not reset/created unless "git checkout" is +successful. 'git checkout' [--patch] [<tree-ish>] [--] <pathspec>...:: @@ -75,6 +86,12 @@ entries; instead, unmerged entries are ignored. Create a new branch named <new_branch> and start it at <start_point>; see linkgit:git-branch[1] for details. +-B:: + Creates the branch <new_branch> and start it at <start_point>; + if it already exists, then reset it to <start_point>. This is + equivalent to running "git branch" with "-f"; see + linkgit:git-branch[1] for details. + -t:: --track:: When creating a new branch, set up "upstream" configuration. See @@ -170,7 +187,7 @@ As a special case, the `"@\{-N\}"` syntax for the N-th last branch checks out the branch (instead of detaching). You may also specify `-` which is synonymous with `"@\{-1\}"`. + -As a further special case, you may use `"A...B"` as a shortcut for the +As a further special case, you may use `"A\...B"` as a shortcut for the merge base of `A` and `B` if there is exactly one merge base. You can leave out at most one of `A` and `B`, in which case it defaults to `HEAD`. diff --git a/Documentation/git-clean.txt b/Documentation/git-clean.txt index a81cb6c280..60e38e6e27 100644 --- a/Documentation/git-clean.txt +++ b/Documentation/git-clean.txt @@ -8,7 +8,7 @@ git-clean - Remove untracked files from the working tree SYNOPSIS -------- [verse] -'git clean' [-d] [-f] [-n] [-q] [-x | -X] [--] <path>... +'git clean' [-d] [-f] [-n] [-q] [-e <pattern>] [-x | -X] [--] <path>... DESCRIPTION ----------- @@ -45,6 +45,12 @@ OPTIONS Be quiet, only report errors, but not the files that are successfully removed. +-e <pattern>:: +--exclude=<pattern>:: + Specify special exceptions to not be cleaned. Each <pattern> is + the same form as in $GIT_DIR/info/excludes and this option can be + given multiple times. + -x:: Don't use the ignore rules. This allows removing all untracked files, including build products. This can be used (possibly in diff --git a/Documentation/git-commit-tree.txt b/Documentation/git-commit-tree.txt index 61888547a1..349366ee1e 100644 --- a/Documentation/git-commit-tree.txt +++ b/Documentation/git-commit-tree.txt @@ -8,7 +8,7 @@ git-commit-tree - Create a new commit object SYNOPSIS -------- -'git commit-tree' <tree> [-p <parent commit>]\* < changelog +'git commit-tree' <tree> [-p <parent commit>]* < changelog DESCRIPTION ----------- diff --git a/Documentation/git-commit.txt b/Documentation/git-commit.txt index c28603ecf5..42fb1f57b2 100644 --- a/Documentation/git-commit.txt +++ b/Documentation/git-commit.txt @@ -130,11 +130,11 @@ OPTIONS Usually recording a commit that has the exact same tree as its sole parent commit is a mistake, and the command prevents you from making such a commit. This option bypasses the safety, and - is primarily for use by foreign scm interface scripts. + is primarily for use by foreign SCM interface scripts. --allow-empty-message:: Like --allow-empty this command is primarily for use by foreign - scm interface scripts. It allows you to create a commit with an + SCM interface scripts. It allows you to create a commit with an empty commit message without using plumbing commands like linkgit:git-commit-tree[1]. diff --git a/Documentation/git-cvsimport.txt b/Documentation/git-cvsimport.txt index 8bcd875a67..608cd63fc3 100644 --- a/Documentation/git-cvsimport.txt +++ b/Documentation/git-cvsimport.txt @@ -188,7 +188,7 @@ ISSUES ------ Problems related to timestamps: - * If timestamps of commits in the cvs repository are not stable enough + * If timestamps of commits in the CVS repository are not stable enough to be used for ordering commits changes may show up in the wrong order. * If any files were ever "cvs import"ed more than once (e.g., import of @@ -201,7 +201,7 @@ Problems related to branches: * Branches on which no commits have been made are not imported. * All files from the branching point are added to a branch even if - never added in cvs. + never added in CVS. * This applies to files added to the source branch *after* a daughter branch was created: if previously no commit was made on the daughter branch they will erroneously be added to the daughter branch in git. diff --git a/Documentation/git-cvsserver.txt b/Documentation/git-cvsserver.txt index 7004dd2dec..f4472c61db 100644 --- a/Documentation/git-cvsserver.txt +++ b/Documentation/git-cvsserver.txt @@ -366,8 +366,8 @@ CRLF Line Ending Conversions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By default the server leaves the '-k' mode blank for all files, -which causes the cvs client to treat them as a text files, subject -to crlf conversion on some platforms. +which causes the CVS client to treat them as a text files, subject +to end-of-line conversion on some platforms. You can make the server use the end-of-line conversion attributes to set the '-k' modes for files by setting the `gitcvs.usecrlfattr` diff --git a/Documentation/git-fast-export.txt b/Documentation/git-fast-export.txt index 98ec6b5871..fcad113276 100644 --- a/Documentation/git-fast-export.txt +++ b/Documentation/git-fast-export.txt @@ -90,10 +90,16 @@ marks the same across runs. resulting stream can only be used by a repository which already contains the necessary objects. +--full-tree:: + This option will cause fast-export to issue a "deleteall" + directive for each commit followed by a full list of all files + in the commit (as opposed to just listing the files which are + different from the commit's first parent). + [git-rev-list-args...]:: A list of arguments, acceptable to 'git rev-parse' and 'git rev-list', that specifies the specific objects and references - to export. For example, `master\~10..master` causes the + to export. For example, `master{tilde}10..master` causes the current master reference to be exported along with all objects added since its 10th ancestor commit. diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 77a0a2481a..966ba4f213 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -482,9 +482,11 @@ External data format:: 'M' SP <mode> SP <dataref> SP <path> LF .... + -Here `<dataref>` can be either a mark reference (`:<idnum>`) +Here usually `<dataref>` must be either a mark reference (`:<idnum>`) set by a prior `blob` command, or a full 40-byte SHA-1 of an -existing Git blob object. +existing Git blob object. If `<mode>` is `040000`` then +`<dataref>` must be the full 40-byte SHA-1 of an existing +Git tree object or a mark reference set with `--import-marks`. Inline data format:: The data content for the file has not been supplied yet. @@ -509,6 +511,8 @@ in octal. Git only supports the following modes: * `160000`: A gitlink, SHA-1 of the object refers to a commit in another repository. Git links can only be specified by SHA or through a commit mark. They are used to implement submodules. +* `040000`: A subdirectory. Subdirectories can only be specified by + SHA or through a tree mark set with `--import-marks`. In both formats `<path>` is the complete path of the file to be added (if not already existing) or modified (if already existing). diff --git a/Documentation/git-fmt-merge-msg.txt b/Documentation/git-fmt-merge-msg.txt index a585dbe898..302f56b889 100644 --- a/Documentation/git-fmt-merge-msg.txt +++ b/Documentation/git-fmt-merge-msg.txt @@ -9,8 +9,8 @@ git-fmt-merge-msg - Produce a merge commit message SYNOPSIS -------- [verse] -'git fmt-merge-msg' [--log | --no-log] <$GIT_DIR/FETCH_HEAD -'git fmt-merge-msg' [--log | --no-log] -F <file> +'git fmt-merge-msg' [-m <message>] [--log | --no-log] <$GIT_DIR/FETCH_HEAD +'git fmt-merge-msg' [-m <message>] [--log | --no-log] -F <file> DESCRIPTION ----------- @@ -38,6 +38,11 @@ OPTIONS Synonyms to --log and --no-log; these are deprecated and will be removed in the future. +-m <message>:: +--message <message>:: + Use <message> instead of the branch names for the first line + of the log message. For use with `--log`. + -F <file>:: --file <file>:: Take the list of merged objects from <file> instead of diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt index 390d85ccae..d66fd9d231 100644 --- a/Documentation/git-for-each-ref.txt +++ b/Documentation/git-for-each-ref.txt @@ -9,7 +9,7 @@ SYNOPSIS -------- [verse] 'git for-each-ref' [--count=<count>] [--shell|--perl|--python|--tcl] - [--sort=<key>]\* [--format=<format>] [<pattern>...] + [--sort=<key>]* [--format=<format>] [<pattern>...] DESCRIPTION ----------- diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 5474dd7f94..dab0a78fa8 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -191,11 +191,11 @@ OPTIONS Examples -------- -git grep 'time_t' \-- '*.[ch]':: +git grep {apostrophe}time_t{apostrophe} \-- {apostrophe}*.[ch]{apostrophe}:: Looks for `time_t` in all tracked .c and .h files in the working directory and its subdirectories. -git grep -e \'#define\' --and \( -e MAX_PATH -e PATH_MAX \):: +git grep -e {apostrophe}#define{apostrophe} --and \( -e MAX_PATH -e PATH_MAX \):: Looks for a line that has `#define` and either `MAX_PATH` or `PATH_MAX`. diff --git a/Documentation/git-hash-object.txt b/Documentation/git-hash-object.txt index 6904739a48..51edeecbe5 100644 --- a/Documentation/git-hash-object.txt +++ b/Documentation/git-hash-object.txt @@ -49,7 +49,7 @@ OPTIONS --no-filters:: Hash the contents as is, ignoring any input filter that would - have been chosen by the attributes mechanism, including crlf + have been chosen by the attributes mechanism, including the end-of-line conversion. If the file is read from standard input then this is always implied, unless the --path option is given. diff --git a/Documentation/git-help.txt b/Documentation/git-help.txt index f8df109d07..eccd0ffd38 100644 --- a/Documentation/git-help.txt +++ b/Documentation/git-help.txt @@ -55,9 +55,9 @@ other display programs (see below). + The web browser can be specified using the configuration variable 'help.browser', or 'web.browser' if the former is not set. If none of -these config variables is set, the 'git web--browse' helper script +these config variables is set, the 'git web{litdd}browse' helper script (called by 'git help') will pick a suitable default. See -linkgit:git-web--browse[1] for more information about this. +linkgit:git-web{litdd}browse[1] for more information about this. CONFIGURATION VARIABLES ----------------------- @@ -80,7 +80,7 @@ help.browser, web.browser and browser.<tool>.path The 'help.browser', 'web.browser' and 'browser.<tool>.path' will also be checked if the 'web' format is chosen (either by command line option or configuration variable). See '-w|--web' in the OPTIONS -section above and linkgit:git-web--browse[1]. +section above and linkgit:git-web{litdd}browse[1]. man.viewer ~~~~~~~~~~ diff --git a/Documentation/git-instaweb.txt b/Documentation/git-instaweb.txt index 2c3c4d2994..7477ce8fa8 100644 --- a/Documentation/git-instaweb.txt +++ b/Documentation/git-instaweb.txt @@ -44,20 +44,23 @@ OPTIONS -b:: --browser:: The web browser that should be used to view the gitweb - page. This will be passed to the 'git web--browse' helper + page. This will be passed to the 'git web{litdd}browse' helper script along with the URL of the gitweb instance. See - linkgit:git-web--browse[1] for more information about this. If + linkgit:git-web{litdd}browse[1] for more information about this. If the script fails, the URL will be printed to stdout. +start:: --start:: Start the httpd instance and exit. This does not generate any of the configuration files for spawning a new instance. +stop:: --stop:: Stop the httpd instance and exit. This does not generate any of the configuration files for spawning a new instance, nor does it close the browser. +restart:: --restart:: Restart the httpd instance and exit. This does not generate any of the configuration files for spawning a new instance. @@ -79,7 +82,7 @@ You may specify configuration in your .git/config If the configuration variable 'instaweb.browser' is not set, 'web.browser' will be used instead if it is defined. See -linkgit:git-web--browse[1] for more information about this. +linkgit:git-web{litdd}browse[1] for more information about this. Author ------ diff --git a/Documentation/git-log.txt b/Documentation/git-log.txt index e970664fe1..c213bdbdc5 100644 --- a/Documentation/git-log.txt +++ b/Documentation/git-log.txt @@ -55,6 +55,9 @@ OPTIONS paths. With this, the full diff is shown for commits that touch the specified paths; this means that "<path>..." limits only commits, and doesn't limit diff for those commits. ++ +Note that this affects all diff-based output types, e.g. those +produced by --stat etc. --log-size:: Before the log message print out its size in bytes. Intended diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt index 3521637b58..15aee2f953 100644 --- a/Documentation/git-ls-files.txt +++ b/Documentation/git-ls-files.txt @@ -10,14 +10,14 @@ SYNOPSIS -------- [verse] 'git ls-files' [-z] [-t] [-v] - (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])\* - (-[c|d|o|i|s|u|k|m])\* + (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])* + (-[c|d|o|i|s|u|k|m])* [-x <pattern>|--exclude=<pattern>] [-X <file>|--exclude-from=<file>] [--exclude-per-directory=<file>] [--exclude-standard] [--error-unmatch] [--with-tree=<tree-ish>] - [--full-name] [--abbrev] [--] [<file>]\* + [--full-name] [--abbrev] [--] [<file>]* DESCRIPTION ----------- @@ -106,8 +106,16 @@ OPTIONS with `-s` or `-u` options does not make any sense. -t:: - Identify the file status with the following tags (followed by - a space) at the start of each line: + This feature is semi-deprecated. For scripting purpose, + linkgit:git-status[1] `--porcelain` and + linkgit:git-diff-files[1] `--name-status` are almost always + superior alternatives, and users should look at + linkgit:git-status[1] `--short` or linkgit:git-diff[1] + `--name-status` for more user-friendly alternatives. ++ +This option identifies the file status with the following tags (followed by +a space) at the start of each line: + H:: cached S:: skip-worktree M:: unmerged @@ -132,6 +140,12 @@ OPTIONS lines, show only a partial prefix. Non default number of digits can be specified with --abbrev=<n>. +--debug:: + After each line that describes a file, add more data about its + cache entry. This is intended to show as much information as + possible for manual inspection; the exact format may change at + any time. + \--:: Do not interpret any more arguments as options. diff --git a/Documentation/git-merge-base.txt b/Documentation/git-merge-base.txt index ce5b369985..eedef1bb1a 100644 --- a/Documentation/git-merge-base.txt +++ b/Documentation/git-merge-base.txt @@ -8,7 +8,9 @@ git-merge-base - Find as good common ancestors as possible for a merge SYNOPSIS -------- -'git merge-base' [-a|--all] <commit> <commit>... +[verse] +'git merge-base' [-a|--all] [--octopus] <commit> <commit>... +'git merge-base' --independent <commit>... DESCRIPTION ----------- @@ -20,12 +22,12 @@ that does not have any better common ancestor is a 'best common ancestor', i.e. a 'merge base'. Note that there can be more than one merge base for a pair of commits. -Among the two commits to compute the merge base from, one is specified by -the first commit argument on the command line; the other commit is a -(possibly hypothetical) commit that is a merge across all the remaining -commits on the command line. As the most common special case, specifying only -two commits on the command line means computing the merge base between -the given two commits. +Unless `--octopus` is given, among the two commits to compute the merge +base from, one is specified by the first commit argument on the command +line; the other commit is a (possibly hypothetical) commit that is a merge +across all the remaining commits on the command line. As the most common +special case, specifying only two commits on the command line means +computing the merge base between the given two commits. As a consequence, the 'merge base' is not necessarily contained in each of the commit arguments if more than two commits are specified. This is different @@ -37,6 +39,18 @@ OPTIONS --all:: Output all merge bases for the commits, instead of just one. +--octopus:: + Compute the best common ancestors of all supplied commits, + in preparation for an n-way merge. This mimics the behavior + of 'git show-branch --merge-base'. + +--independent:: + Instead of printing merge bases, print a minimal subset of + the supplied commits with the same ancestors. In other words, + among the commits given, list those which cannot be reached + from any other. This mimics the behavior of 'git show-branch + --independent'. + DISCUSSION ---------- @@ -96,6 +110,12 @@ Documentation -------------- Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>. +See also +-------- +linkgit:git-rev-list[1], +linkgit:git-show-branch[1], +linkgit:git-merge[1] + GIT --- Part of the linkgit:git[1] suite diff --git a/Documentation/git-merge-index.txt b/Documentation/git-merge-index.txt index 4d266de9cc..921b38f183 100644 --- a/Documentation/git-merge-index.txt +++ b/Documentation/git-merge-index.txt @@ -8,7 +8,7 @@ git-merge-index - Run a merge for files needing merging SYNOPSIS -------- -'git merge-index' [-o] [-q] <merge-program> (-a | [--] <file>\*) +'git merge-index' [-o] [-q] <merge-program> (-a | [--] <file>*) DESCRIPTION ----------- diff --git a/Documentation/git-mergetool--lib.txt b/Documentation/git-mergetool--lib.txt index 78eb03f0ae..d8df55362c 100644 --- a/Documentation/git-mergetool--lib.txt +++ b/Documentation/git-mergetool--lib.txt @@ -1,5 +1,5 @@ -git-mergetool--lib(1) -===================== +git-mergetool{litdd}lib(1) +========================== NAME ---- @@ -16,11 +16,11 @@ This is not a command the end user would want to run. Ever. This documentation is meant for people who are studying the Porcelain-ish scripts and/or are writing new ones. -The 'git-mergetool--lib' scriptlet is designed to be sourced (using +The 'git-mergetool{litdd}lib' scriptlet is designed to be sourced (using `.`) by other shell scripts to set up functions for working with git merge tools. -Before sourcing 'git-mergetool--lib', your script must set `TOOL_MODE` +Before sourcing 'git-mergetool{litdd}lib', your script must set `TOOL_MODE` to define the operation mode for the functions listed below. 'diff' and 'merge' are valid values. diff --git a/Documentation/git-notes.txt b/Documentation/git-notes.txt index 5540af5d16..2981d8c5ef 100644 --- a/Documentation/git-notes.txt +++ b/Documentation/git-notes.txt @@ -129,10 +129,12 @@ OPTIONS is taken to be in `refs/notes/` if it is not qualified. -n:: +--dry-run:: Do not remove anything; just report the object names whose notes would be removed. -v:: +--verbose:: Report all object names whose notes are removed. diff --git a/Documentation/git-prune.txt b/Documentation/git-prune.txt index 15cfb7a8dc..4d673a5686 100644 --- a/Documentation/git-prune.txt +++ b/Documentation/git-prune.txt @@ -31,10 +31,12 @@ OPTIONS ------- -n:: +--dry-run:: Do not remove anything; just report what it would remove. -v:: +--verbose:: Report all removed objects. \--:: diff --git a/Documentation/git-pull.txt b/Documentation/git-pull.txt index ab4de10358..c50f7dcb89 100644 --- a/Documentation/git-pull.txt +++ b/Documentation/git-pull.txt @@ -8,29 +8,72 @@ git-pull - Fetch from and merge with another repository or a local branch SYNOPSIS -------- -'git pull' <options> <repository> <refspec>... +'git pull' [options] [<repository> [<refspec>...]] DESCRIPTION ----------- -Runs 'git fetch' with the given parameters, and calls 'git merge' -to merge the retrieved head(s) into the current branch. -With `--rebase`, calls 'git rebase' instead of 'git merge'. -Note that you can use `.` (current directory) as the -<repository> to pull from the local repository -- this is useful -when merging local branches into the current branch. +Incorporates changes from a remote repository into the current +branch. In its default mode, `git pull` is shorthand for +`git fetch` followed by `git merge FETCH_HEAD`. -Also note that options meant for 'git pull' itself and underlying -'git merge' must be given before the options meant for 'git fetch'. +More precisely, 'git pull' runs 'git fetch' with the given +parameters and calls 'git merge' to merge the retrieved branch +heads into the current branch. +With `--rebase`, it runs 'git rebase' instead of 'git merge'. -*Warning*: Running 'git pull' (actually, the underlying 'git merge') +<repository> should be the name of a remote repository as +passed to linkgit:git-fetch[1]. <refspec> can name an +arbitrary remote ref (for example, the name of a tag) or even +a collection of refs with corresponding remote tracking branches +(e.g., refs/heads/*:refs/remotes/origin/*), but usually it is +the name of a branch in the remote repository. + +Default values for <repository> and <branch> are read from the +"remote" and "merge" configuration for the current branch +as set by linkgit:git-branch[1] `--track`. + +Assume the following history exists and the current branch is +"`master`": + +------------ + A---B---C master on origin + / + D---E---F---G master +------------ + +Then "`git pull`" will fetch and replay the changes from the remote +`master` branch since it diverged from the local `master` (i.e., `E`) +until its current commit (`C`) on top of `master` and record the +result in a new commit along with the names of the two parent commits +and a log message from the user describing the changes. + +------------ + A---B---C remotes/origin/master + / \ + D---E---F---G---H master +------------ + +See linkgit:git-merge[1] for details, including how conflicts +are presented and handled. + +In git 1.7.0 or later, to cancel a conflicting merge, use +`git reset --merge`. *Warning*: In older versions of git, running 'git pull' with uncommitted changes is discouraged: while possible, it leaves you -in a state that is hard to back out of in the case of a conflict. +in a state that may be hard to back out of in the case of a conflict. + +If any of the remote changes overlap with local uncommitted changes, +the merge will be automatically cancelled and the work tree untouched. +It is generally best to get any local changes in working order before +pulling or stash them away with linkgit:git-stash[1]. OPTIONS ------- +Options meant for 'git pull' itself and the underlying 'git merge' +must be given before the options meant for 'git fetch'. + -q:: --quiet:: This is passed to both underlying git-fetch to squelch reporting of diff --git a/Documentation/git-push.txt b/Documentation/git-push.txt index b68abff28a..020955ff5a 100644 --- a/Documentation/git-push.txt +++ b/Documentation/git-push.txt @@ -200,16 +200,29 @@ summary:: For a successfully pushed ref, the summary shows the old and new values of the ref in a form suitable for using as an argument to `git log` (this is `<old>..<new>` in most cases, and - `<old>...<new>` for forced non-fast-forward updates). For a - failed update, more details are given for the failure. - The string `rejected` indicates that git did not try to send the - ref at all (typically because it is not a fast-forward). The - string `remote rejected` indicates that the remote end refused - the update; this rejection is typically caused by a hook on the - remote side. The string `remote failure` indicates that the - remote end did not report the successful update of the ref - (perhaps because of a temporary error on the remote side, a - break in the network connection, or other transient error). + `<old>\...<new>` for forced non-fast-forward updates). ++ +For a failed update, more details are given: ++ +-- +rejected:: + Git did not try to send the ref at all, typically because it + is not a fast-forward and you did not force the update. + +remote rejected:: + The remote end refused the update. Usually caused by a hook + on the remote side, or because the remote repository has one + of the following safety options in effect: + `receive.denyCurrentBranch` (for pushes to the checked out + branch), `receive.denyNonFastForwards` (for forced + non-fast-forward updates), `receive.denyDeletes` or + `receive.denyDeleteCurrent`. See linkgit:git-config[1]. + +remote failure:: + The remote end did not report the successful update of the ref, + perhaps because of a temporary error on the remote side, a + break in the network connection, or other transient error. +-- from:: The name of the local ref being pushed, minus its diff --git a/Documentation/git-read-tree.txt b/Documentation/git-read-tree.txt index f6037c4f6a..2e78da448f 100644 --- a/Documentation/git-read-tree.txt +++ b/Documentation/git-read-tree.txt @@ -412,6 +412,13 @@ turn `core.sparseCheckout` on in order to have sparse checkout support. +BUGS +---- +In order to match a directory with $GIT_DIR/info/sparse-checkout, +trailing slash must be used. The form without trailing slash, while +works with .gitignore, does not work with sparse checkout. + + SEE ALSO -------- linkgit:git-write-tree[1]; linkgit:git-ls-files[1]; diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt index be23ad2359..30e5c0eb14 100644 --- a/Documentation/git-rebase.txt +++ b/Documentation/git-rebase.txt @@ -199,6 +199,9 @@ rebase.stat:: Whether to show a diffstat of what changed upstream since the last rebase. False by default. +rebase.autosquash:: + If set to true enable '--autosquash' option by default. + OPTIONS ------- <newbase>:: @@ -207,7 +210,7 @@ OPTIONS <upstream>. May be any valid commit, and not just an existing branch name. + -As a special case, you may use "A...B" as a shortcut for the +As a special case, you may use "A\...B" as a shortcut for the merge base of A and B if there is exactly one merge base. You can leave out at most one of A and B, in which case it defaults to HEAD. @@ -250,6 +253,13 @@ on top of the <upstream> branch using the given strategy, using the 'ours' strategy simply discards all patches from the <branch>, which makes little sense. +-X <strategy-option>:: +--strategy-option=<strategy-option>:: + Pass the <strategy-option> through to the merge strategy. + This implies `\--merge` and, if no strategy has been + specified, `-s recursive`. Note the reversal of 'ours' and + 'theirs' as noted in above for the `-m` option. + -q:: --quiet:: Be quiet. Implies --no-stat. @@ -326,6 +336,7 @@ idea unless you know what you are doing (see BUGS below). instead. --autosquash:: +--no-autosquash:: When the commit log message begins with "squash! ..." (or "fixup! ..."), and there is a commit whose title begins with the same ..., automatically modify the todo list of rebase -i @@ -334,6 +345,10 @@ idea unless you know what you are doing (see BUGS below). commit from `pick` to `squash` (or `fixup`). + This option is only valid when the '--interactive' option is used. ++ +If the '--autosquash' option is enabled by default using the +configuration variable `rebase.autosquash`, this option can be +used to override and disable this setting. --no-ff:: With --interactive, cherry-pick all rebased commits instead of @@ -459,6 +474,30 @@ sure that the current HEAD is "B", and call $ git rebase -i -p --onto Q O ----------------------------- +Reordering and editing commits usually creates untested intermediate +steps. You may want to check that your history editing did not break +anything by running a test, or at least recompiling at intermediate +points in history by using the "exec" command (shortcut "x"). You may +do so by creating a todo list like this one: + +------------------------------------------- +pick deadbee Implement feature XXX +fixup f1a5c00 Fix to feature XXX +exec make +pick c0ffeee The oneline of the next commit +edit deadbab The oneline of the commit after +exec cd subdir; make test +... +------------------------------------------- + +The interactive rebase will stop when a command fails (i.e. exits with +non-0 status) to give you an opportunity to fix the problem. You can +continue with `git rebase --continue`. + +The "exec" command launches the command in a shell (the one specified +in `$SHELL`, or the default shell if `$SHELL` is not set), so you can +use shell features (like "cd", ">", ";" ...). The command is run from +the root of the working tree. SPLITTING COMMITS ----------------- diff --git a/Documentation/git-relink.txt b/Documentation/git-relink.txt index 25ff8f9dcb..8a5842bb93 100644 --- a/Documentation/git-relink.txt +++ b/Documentation/git-relink.txt @@ -7,7 +7,7 @@ git-relink - Hardlink common objects in local repositories SYNOPSIS -------- -'git relink' [--safe] <dir> [<dir>]\* <master_dir> +'git relink' [--safe] <dir> [<dir>]* <master_dir> DESCRIPTION ----------- diff --git a/Documentation/git-request-pull.txt b/Documentation/git-request-pull.txt index 19335fddae..400f61f6e2 100644 --- a/Documentation/git-request-pull.txt +++ b/Documentation/git-request-pull.txt @@ -7,7 +7,7 @@ git-request-pull - Generates a summary of pending changes SYNOPSIS -------- -'git request-pull' <start> <url> [<end>] +'git request-pull' [-p] <start> <url> [<end>] DESCRIPTION ----------- @@ -17,6 +17,9 @@ the given URL in the generated summary. OPTIONS ------- +-p:: + Show patch text + <start>:: Commit to start at. diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 645f0c1748..9cf31485fe 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -8,40 +8,50 @@ git-reset - Reset current HEAD to the specified state SYNOPSIS -------- [verse] -'git reset' [--mixed | --soft | --hard | --merge | --keep] [-q] [<commit>] 'git reset' [-q] [<commit>] [--] <paths>... 'git reset' --patch [<commit>] [--] [<paths>...] +'git reset' [--soft | --mixed | --hard | --merge | --keep] [-q] [<commit>] DESCRIPTION ----------- -Sets the current head to the specified commit and optionally resets the -index and working tree to match. - -This command is useful if you notice some small error in a recent -commit (or set of commits) and want to redo that part without showing -the undo in the history. - -If you want to undo a commit other than the latest on a branch, -linkgit:git-revert[1] is your friend. - -The second and third forms with 'paths' and/or --patch are used to -revert selected paths in the index from a given commit, without moving -HEAD. - +In the first and second form, copy entries from <commit> to the index. +In the third form, set the current branch to <commit>, optionally +modifying index and worktree to match. The <commit> defaults to HEAD +in all forms. + +'git reset' [-q] [<commit>] [--] <paths>...:: + This form resets the index entries for all <paths> to their + state at the <commit>. (It does not affect the worktree, nor + the current branch.) ++ +This means that `git reset <paths>` is the opposite of `git add +<paths>`. -OPTIONS -------- ---mixed:: - Resets the index but not the working tree (i.e., the changed files - are preserved but not marked for commit) and reports what has not - been updated. This is the default action. +'git reset' --patch|-p [<commit>] [--] [<paths>...]:: + Interactively select hunks in the difference between the index + and <commit> (defaults to HEAD). The chosen hunks are applied + in reverse to the index. ++ +This means that `git reset -p` is the opposite of `git add -p` (see +linkgit:git-add[1]). +'git reset' [--<mode>] [<commit>]:: + This form points the current branch to <commit> and then + updates index and working tree according to <mode>, which must + be one of the following: ++ +-- --soft:: Does not touch the index file nor the working tree at all, but requires them to be in a good order. This leaves all your changed files "Changes to be committed", as 'git status' would put it. +--mixed:: + Resets the index but not the working tree (i.e., the changed files + are preserved but not marked for commit) and reports what has not + been updated. This is the default action. + --hard:: Matches the working tree and index to that of the tree being switched to. Any changes to tracked files in the working tree @@ -59,132 +69,46 @@ OPTIONS the given commit. If a file that is different between the current commit and the given commit has local changes, reset is aborted. +-- --p:: ---patch:: - Interactively select hunks in the difference between the index - and <commit> (defaults to HEAD). The chosen hunks are applied - in reverse to the index. -+ -This means that `git reset -p` is the opposite of `git add -p` (see -linkgit:git-add[1]). +If you want to undo a commit other than the latest on a branch, +linkgit:git-revert[1] is your friend. + + +OPTIONS +------- -q:: --quiet:: Be quiet, only report errors. -<commit>:: - Commit to make the current HEAD. If not given defaults to HEAD. - -DISCUSSION ----------- -The tables below show what happens when running: - ----------- -git reset --option target ----------- - -to reset the HEAD to another commit (`target`) with the different -reset options depending on the state of the files. - -In these tables, A, B, C and D are some different states of a -file. For example, the first line of the first table means that if a -file is in state A in the working tree, in state B in the index, in -state C in HEAD and in state D in the target, then "git reset --soft -target" will put the file in state A in the working tree, in state B -in the index and in state D in HEAD. - - working index HEAD target working index HEAD - ---------------------------------------------------- - A B C D --soft A B D - --mixed A D D - --hard D D D - --merge (disallowed) - --keep (disallowed) - - working index HEAD target working index HEAD - ---------------------------------------------------- - A B C C --soft A B C - --mixed A C C - --hard C C C - --merge (disallowed) - --keep A C C - - working index HEAD target working index HEAD - ---------------------------------------------------- - B B C D --soft B B D - --mixed B D D - --hard D D D - --merge D D D - --keep (disallowed) - - working index HEAD target working index HEAD - ---------------------------------------------------- - B B C C --soft B B C - --mixed B C C - --hard C C C - --merge C C C - --keep B C C - - working index HEAD target working index HEAD - ---------------------------------------------------- - B C C D --soft B C D - --mixed B D D - --hard D D D - --merge (disallowed) - --keep (disallowed) - - working index HEAD target working index HEAD - ---------------------------------------------------- - B C C C --soft B C C - --mixed B C C - --hard C C C - --merge B C C - --keep B C C - -"reset --merge" is meant to be used when resetting out of a conflicted -merge. Any mergy operation guarantees that the work tree file that is -involved in the merge does not have local change wrt the index before -it starts, and that it writes the result out to the work tree. So if -we see some difference between the index and the target and also -between the index and the work tree, then it means that we are not -resetting out from a state that a mergy operation left after failing -with a conflict. That is why we disallow --merge option in this case. - -"reset --keep" is meant to be used when removing some of the last -commits in the current branch while keeping changes in the working -tree. If there could be conflicts between the changes in the commit we -want to remove and the changes in the working tree we want to keep, -the reset is disallowed. That's why it is disallowed if there are both -changes between the working tree and HEAD, and between HEAD and the -target. To be safe, it is also disallowed when there are unmerged -entries. - -The following tables show what happens when there are unmerged -entries: - - working index HEAD target working index HEAD - ---------------------------------------------------- - X U A B --soft (disallowed) - --mixed X B B - --hard B B B - --merge B B B - --keep (disallowed) - - working index HEAD target working index HEAD - ---------------------------------------------------- - X U A A --soft (disallowed) - --mixed X A A - --hard A A A - --merge A A A - --keep (disallowed) - -X means any state and U means an unmerged index. - -Examples +EXAMPLES -------- +Undo add:: ++ +------------ +$ edit <1> +$ git add frotz.c filfre.c +$ mailx <2> +$ git reset <3> +$ git pull git://info.example.com/ nitfol <4> +------------ ++ +<1> You are happily working on something, and find the changes +in these files are in good order. You do not want to see them +when you run "git diff", because you plan to work on other files +and changes with these files are distracting. +<2> Somebody asks you to pull, and the changes sounds worthy of merging. +<3> However, you already dirtied the index (i.e. your index does +not match the HEAD commit). But you know the pull you are going +to make does not affect frotz.c nor filfre.c, so you revert the +index changes for these two files. Your changes in working tree +remain there. +<4> Then you can pull and merge, leaving frotz.c and filfre.c +changes still in the working tree. + Undo a commit and redo:: + ------------ @@ -204,19 +128,6 @@ edit the message further, you can give -C option instead. + See also the --amend option to linkgit:git-commit[1]. -Undo commits permanently:: -+ ------------- -$ git commit ... -$ git reset --hard HEAD~3 <1> ------------- -+ -<1> The last three commits (HEAD, HEAD^, and HEAD~2) were bad -and you do not want to ever see them again. Do *not* do this if -you have already given these commits to somebody else. (See the -"RECOVERING FROM UPSTREAM REBASE" section in linkgit:git-rebase[1] for -the implications of doing so.) - Undo a commit, making it a topic branch:: + ------------ @@ -232,28 +143,18 @@ current HEAD. <2> Rewind the master branch to get rid of those three commits. <3> Switch to "topic/wip" branch and keep working. -Undo add:: +Undo commits permanently:: + ------------ -$ edit <1> -$ git add frotz.c filfre.c -$ mailx <2> -$ git reset <3> -$ git pull git://info.example.com/ nitfol <4> +$ git commit ... +$ git reset --hard HEAD~3 <1> ------------ + -<1> You are happily working on something, and find the changes -in these files are in good order. You do not want to see them -when you run "git diff", because you plan to work on other files -and changes with these files are distracting. -<2> Somebody asks you to pull, and the changes sounds worthy of merging. -<3> However, you already dirtied the index (i.e. your index does -not match the HEAD commit). But you know the pull you are going -to make does not affect frotz.c nor filfre.c, so you revert the -index changes for these two files. Your changes in working tree -remain there. -<4> Then you can pull and merge, leaving frotz.c and filfre.c -changes still in the working tree. +<1> The last three commits (HEAD, HEAD^, and HEAD~2) were bad +and you do not want to ever see them again. Do *not* do this if +you have already given these commits to somebody else. (See the +"RECOVERING FROM UPSTREAM REBASE" section in linkgit:git-rebase[1] for +the implications of doing so.) Undo a merge or pull:: + @@ -355,7 +256,7 @@ Keep changes in working tree while discarding some previous commits:: Suppose you are working on something and you commit it, and then you continue working a bit more, but now you think that what you have in your working tree should be in another branch that has nothing to do -with what you commited previously. You can start a new branch and +with what you committed previously. You can start a new branch and reset it while keeping the changes in your work tree. + ------------ @@ -376,6 +277,114 @@ $ git reset --keep start <3> <3> But you can use "reset --keep" to remove the unwanted commit after you switched to "branch2". + +DISCUSSION +---------- + +The tables below show what happens when running: + +---------- +git reset --option target +---------- + +to reset the HEAD to another commit (`target`) with the different +reset options depending on the state of the files. + +In these tables, A, B, C and D are some different states of a +file. For example, the first line of the first table means that if a +file is in state A in the working tree, in state B in the index, in +state C in HEAD and in state D in the target, then "git reset --soft +target" will put the file in state A in the working tree, in state B +in the index and in state D in HEAD. + + working index HEAD target working index HEAD + ---------------------------------------------------- + A B C D --soft A B D + --mixed A D D + --hard D D D + --merge (disallowed) + --keep (disallowed) + + working index HEAD target working index HEAD + ---------------------------------------------------- + A B C C --soft A B C + --mixed A C C + --hard C C C + --merge (disallowed) + --keep A C C + + working index HEAD target working index HEAD + ---------------------------------------------------- + B B C D --soft B B D + --mixed B D D + --hard D D D + --merge D D D + --keep (disallowed) + + working index HEAD target working index HEAD + ---------------------------------------------------- + B B C C --soft B B C + --mixed B C C + --hard C C C + --merge C C C + --keep B C C + + working index HEAD target working index HEAD + ---------------------------------------------------- + B C C D --soft B C D + --mixed B D D + --hard D D D + --merge (disallowed) + --keep (disallowed) + + working index HEAD target working index HEAD + ---------------------------------------------------- + B C C C --soft B C C + --mixed B C C + --hard C C C + --merge B C C + --keep B C C + +"reset --merge" is meant to be used when resetting out of a conflicted +merge. Any mergy operation guarantees that the work tree file that is +involved in the merge does not have local change wrt the index before +it starts, and that it writes the result out to the work tree. So if +we see some difference between the index and the target and also +between the index and the work tree, then it means that we are not +resetting out from a state that a mergy operation left after failing +with a conflict. That is why we disallow --merge option in this case. + +"reset --keep" is meant to be used when removing some of the last +commits in the current branch while keeping changes in the working +tree. If there could be conflicts between the changes in the commit we +want to remove and the changes in the working tree we want to keep, +the reset is disallowed. That's why it is disallowed if there are both +changes between the working tree and HEAD, and between HEAD and the +target. To be safe, it is also disallowed when there are unmerged +entries. + +The following tables show what happens when there are unmerged +entries: + + working index HEAD target working index HEAD + ---------------------------------------------------- + X U A B --soft (disallowed) + --mixed X B B + --hard B B B + --merge B B B + --keep (disallowed) + + working index HEAD target working index HEAD + ---------------------------------------------------- + X U A A --soft (disallowed) + --mixed X A A + --hard A A A + --merge A A A + --keep (disallowed) + +X means any state and U means an unmerged index. + + Author ------ Written by Junio C Hamano <gitster@pobox.com> and Linus Torvalds <torvalds@osdl.org> diff --git a/Documentation/git-rev-parse.txt b/Documentation/git-rev-parse.txt index 0727f431c6..341ca90c6e 100644 --- a/Documentation/git-rev-parse.txt +++ b/Documentation/git-rev-parse.txt @@ -74,7 +74,7 @@ OPTIONS properly quoted for consumption by shell. Useful when you expect your parameter to contain whitespaces and newlines (e.g. when using pickaxe `-S` with - 'git diff-\*'). In contrast to the `--sq-quote` option, + 'git diff-{asterisk}'). In contrast to the `--sq-quote` option, the command input is still interpreted as usual. --not:: @@ -112,14 +112,15 @@ OPTIONS + If a `pattern` is given, only refs matching the given shell glob are shown. If the pattern does not contain a globbing character (`?`, -`\*`, or `[`), it is turned into a prefix match by appending `/\*`. +`{asterisk}`, or `[`), it is turned into a prefix match by +appending `/{asterisk}`. --glob=pattern:: Show all refs matching the shell glob pattern `pattern`. If the pattern does not start with `refs/`, this is automatically prepended. If the pattern does not contain a globbing - character (`?`, `\*`, or `[`), it is turned into a prefix - match by appending `/\*`. + character (`?`, `{asterisk}`, or `[`), it is turned into a prefix + match by appending `/{asterisk}`. --show-toplevel:: Show the absolute path of the top-level directory. @@ -184,10 +185,13 @@ scripts the same facilities C builtins have. It works as an option normalizer (e.g. splits single switches aggregate values), a bit like `getopt(1)` does. It takes on the standard input the specification of the options to parse and -understand, and echoes on the standard output a line suitable for `sh(1)` `eval` +understand, and echoes on the standard output a string suitable for `sh(1)` `eval` to replace the arguments with normalized ones. In case of error, it outputs usage on the standard error stream, and exits with code 129. +Note: Make sure you quote the result when passing it to `eval`. See +below for an example. + Input Format ~~~~~~~~~~~~ @@ -244,7 +248,7 @@ bar= some cool option --bar with an argument An option group Header C? option C with an optional argument" -eval `echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?` +eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)" ------------ SQ-QUOTE diff --git a/Documentation/git-rm.txt b/Documentation/git-rm.txt index c21d19e573..71e3d9fc23 100644 --- a/Documentation/git-rm.txt +++ b/Documentation/git-rm.txt @@ -78,7 +78,8 @@ a file that you have not told git about does not remove that file. File globbing matches across directory boundaries. Thus, given two directories `d` and `d2`, there is a difference between -using `git rm \'d\*\'` and `git rm \'d/\*\'`, as the former will +using `git rm {apostrophe}d{asterisk}{apostrophe}` and +`git rm {apostrophe}d/{asterisk}{apostrophe}`, as the former will also remove all of directory `d2`. REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM @@ -135,11 +136,11 @@ git diff --name-only --diff-filter=D -z | xargs -0 git rm --cached EXAMPLES -------- -git rm Documentation/\\*.txt:: - Removes all `\*.txt` files from the index that are under the +git rm Documentation/\*.txt:: + Removes all `*.txt` files from the index that are under the `Documentation` directory and any of its subdirectories. + -Note that the asterisk `\*` is quoted from the shell in this +Note that the asterisk `*` is quoted from the shell in this example; this lets git, and not the shell, expand the pathnames of files and subdirectories under the `Documentation/` directory. diff --git a/Documentation/git-show-branch.txt b/Documentation/git-show-branch.txt index 81ba29669c..6453263340 100644 --- a/Documentation/git-show-branch.txt +++ b/Documentation/git-show-branch.txt @@ -168,10 +168,10 @@ $ git show-branch master fixes mhf ------------------------------------------------ These three branches all forked from a common commit, [master], -whose commit message is "Add \'git show-branch\'". The "fixes" -branch adds one commit "Introduce "reset type" flag to "git reset"". -The "mhf" branch adds many other commits. The current branch -is "master". +whose commit message is "Add {apostrophe}git show-branch{apostrophe}". +The "fixes" branch adds one commit "Introduce "reset type" flag to +"git reset"". The "mhf" branch adds many other commits. +The current branch is "master". EXAMPLE diff --git a/Documentation/git-show-ref.txt b/Documentation/git-show-ref.txt index 3f9d9c6db3..4696af7433 100644 --- a/Documentation/git-show-ref.txt +++ b/Documentation/git-show-ref.txt @@ -73,8 +73,8 @@ OPTIONS --exclude-existing[=<pattern>]:: Make 'git show-ref' act as a filter that reads refs from stdin of the - form "^(?:<anything>\s)?<refname>(?:\^\{\})?$" and performs the - following actions on each: + form "^(?:<anything>\s)?<refname>(?:{backslash}{caret}\{\})?$" + and performs the following actions on each: (1) strip "^{}" at the end of line if any; (2) ignore if pattern is provided and does not head-match refname; (3) warn if refname is not a well-formed refname and skip; @@ -163,9 +163,15 @@ flag, so you can do to get a listing of all tags together with what they dereference. +FILES +----- +`.git/refs/*`, `.git/packed-refs` + SEE ALSO -------- -linkgit:git-ls-remote[1] +linkgit:git-ls-remote[1], +linkgit:git-update-ref[1], +linkgit:gitrepository-layout[5] AUTHORS ------- diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt index 2fd054c104..dae190a5f2 100644 --- a/Documentation/git-status.txt +++ b/Documentation/git-status.txt @@ -55,7 +55,11 @@ specified. --ignore-submodules[=<when>]:: Ignore changes to submodules when looking for changes. <when> can be - either "untracked", "dirty" or "all", which is the default. When + either "none", "untracked", "dirty" or "all", which is the default. + Using "none" will consider the submodule modified when it either contains + untracked or modified files or its HEAD differs from the commit recorded + in the superproject and can be used to override any settings of the + 'ignore' option in linkgit:git-config[1] or linkgit:gitmodules[5]. When "untracked" is used submodules are not considered dirty when they only contain untracked content (but they are still scanned for modified content). Using "dirty" ignores all changes to the work tree of submodules, diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index b09bd9761f..4b84d08fc8 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -646,6 +646,12 @@ svn.brokenSymlinkWorkaround:: revision fetched. If unset, 'git svn' assumes this option to be "true". +svn.pathnameencoding:: + This instructs git svn to recode pathnames to a given encoding. + It can be used by windows users and by those who work in non-utf8 + locales to avoid corrupted file names with non-ASCII characters. + Valid encodings are the ones supported by Perl's Encode module. + Since the noMetadata, rewriteRoot, rewriteUUID, useSvnsyncProps and useSvmProps options all affect the metadata generated and used by 'git svn'; they *must* be set in the configuration file before any history is imported diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt index 765d4b312e..74d1d49dbf 100644 --- a/Documentation/git-update-index.txt +++ b/Documentation/git-update-index.txt @@ -12,7 +12,7 @@ SYNOPSIS 'git update-index' [--add] [--remove | --force-remove] [--replace] [--refresh] [-q] [--unmerged] [--ignore-missing] - [--cacheinfo <mode> <object> <file>]\* + [--cacheinfo <mode> <object> <file>]* [--chmod=(+|-)x] [--assume-unchanged | --no-assume-unchanged] [--skip-worktree | --no-skip-worktree] @@ -21,7 +21,7 @@ SYNOPSIS [--info-only] [--index-info] [-z] [--stdin] [--verbose] - [--] [<file>]\* + [--] [<file>]* DESCRIPTION ----------- diff --git a/Documentation/git-web--browse.txt b/Documentation/git-web--browse.txt index 75720491b2..e1586c78c3 100644 --- a/Documentation/git-web--browse.txt +++ b/Documentation/git-web--browse.txt @@ -1,5 +1,5 @@ -git-web--browse(1) -================== +git-web{litdd}browse(1) +======================= NAME ---- @@ -7,7 +7,7 @@ git-web--browse - git helper script to launch a web browser SYNOPSIS -------- -'git web--browse' [OPTIONS] URL/FILE ... +'git web{litdd}browse' [OPTIONS] URL/FILE ... DESCRIPTION ----------- @@ -71,7 +71,7 @@ browser.<tool>.cmd When the browser, specified by options or configuration variables, is not among the supported ones, then the corresponding 'browser.<tool>.cmd' configuration variable will be looked up. If this -variable exists then 'git web--browse' will treat the specified tool +variable exists then 'git web{litdd}browse' will treat the specified tool as a custom command and will use a shell eval to run the command with the URLs passed as arguments. diff --git a/Documentation/git.txt b/Documentation/git.txt index 27ece58857..93e3b07c6c 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -44,20 +44,25 @@ unreleased) version of git, that is available from 'master' branch of the `git.git` repository. Documentation for older releases are available here: -* link:v1.7.2/git.html[documentation for release 1.7.2] +* link:v1.7.2.3/git.html[documentation for release 1.7.2.3] * release notes for + link:RelNotes-1.7.2.3.txt[1.7.2.3], + link:RelNotes-1.7.2.2.txt[1.7.2.2], + link:RelNotes-1.7.2.1.txt[1.7.2.1], link:RelNotes-1.7.2.txt[1.7.2]. -* link:v1.7.1.1/git.html[documentation for release 1.7.1.1] +* link:v1.7.1.2/git.html[documentation for release 1.7.1.2] * release notes for + link:RelNotes-1.7.1.2.txt[1.7.1.2], link:RelNotes-1.7.1.1.txt[1.7.1.1], link:RelNotes-1.7.1.txt[1.7.1]. -* link:v1.7.0.6/git.html[documentation for release 1.7.0.6] +* link:v1.7.0.7/git.html[documentation for release 1.7.0.7] * release notes for + link:RelNotes-1.7.0.7.txt[1.7.0.7], link:RelNotes-1.7.0.6.txt[1.7.0.6], link:RelNotes-1.7.0.5.txt[1.7.0.5], link:RelNotes-1.7.0.4.txt[1.7.0.4], @@ -724,6 +729,13 @@ The documentation for git suite was started by David Greaves <david@dgreaves.com>, and later enhanced greatly by the contributors on the git-list <git@vger.kernel.org>. +Reporting Bugs +-------------- + +Report bugs to the Git mailing list <git@vger.kernel.org> where the +development and maintenance is primarily done. You do not have to be +subscribed to the list to send a message there. + SEE ALSO -------- linkgit:gittutorial[7], linkgit:gittutorial-2[7], diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 564586b943..e5a27d875e 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -317,6 +317,17 @@ command is "cat"). smudge = cat ------------------------ +For best results, `clean` should not alter its output further if it is +run twice ("clean->clean" should be equivalent to "clean"), and +multiple `smudge` commands should not alter `clean`'s output +("smudge->smudge->clean" should be equivalent to "clean"). See the +section on merging below. + +The "indent" filter is well-behaved in this regard: it will not modify +input that is already correctly indented. In this case, the lack of a +smudge filter means that the clean filter _must_ accept its own output +without modifying it. + Interaction between checkin/checkout attributes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -331,6 +342,29 @@ In the check-out codepath, the blob content is first converted with `text`, and then `ident` and fed to `filter`. +Merging branches with differing checkin/checkout attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you have added attributes to a file that cause the canonical +repository format for that file to change, such as adding a +clean/smudge filter or text/eol/ident attributes, merging anything +where the attribute is not in place would normally cause merge +conflicts. + +To prevent these unnecessary merge conflicts, git can be told to run a +virtual check-out and check-in of all three stages of a file when +resolving a three-way merge by setting the `merge.renormalize` +configuration variable. This prevents changes caused by check-in +conversion from causing spurious merge conflicts when a converted file +is merged with an unconverted file. + +As long as a "smudge->clean" results in the same output as a "clean" +even on files that are already smudged, this strategy will +automatically resolve all filter-related conflicts. Filters that do +not act in this way may cause additional merge conflicts that must be +resolved manually. + + Generating diff text ~~~~~~~~~~~~~~~~~~~~ @@ -441,6 +475,8 @@ patterns are available: - `cpp` suitable for source code in the C and C++ languages. +- `csharp` suitable for source code in the C# language. + - `html` suitable for HTML/XHTML documents. - `java` suitable for source code in the Java language. diff --git a/Documentation/gitcore-tutorial.txt b/Documentation/gitcore-tutorial.txt index ed3ddc92cb..5e9c5ebba3 100644 --- a/Documentation/gitcore-tutorial.txt +++ b/Documentation/gitcore-tutorial.txt @@ -110,7 +110,7 @@ An 'object' is identified by its 160-bit SHA1 hash, aka 'object name', and a reference to an object is always the 40-byte hex representation of that SHA1 name. The files in the `refs` subdirectory are expected to contain these hex references -(usually with a final `\'\n\'` at the end), and you should thus +(usually with a final `\n` at the end), and you should thus expect to see a number of 41-byte files containing these references in these `refs` subdirectories when you actually start populating your tree. @@ -310,7 +310,7 @@ and this will just output the name of the resulting tree, in this case ---------------- which is another incomprehensible object name. Again, if you want to, -you can use `git cat-file -t 8988d\...` to see that this time the object +you can use `git cat-file -t 8988d...` to see that this time the object is not a "blob" object, but a "tree" object (you can also use `git cat-file` to actually output the raw object contents, but you'll see mainly a binary mess, so that's less interesting). @@ -436,8 +436,8 @@ $ git update-index hello (note how we didn't need the `\--add` flag this time, since git knew about the file already). -Note what happens to the different 'git diff-\*' versions here. After -we've updated `hello` in the index, `git diff-files -p` now shows no +Note what happens to the different 'git diff-{asterisk}' versions here. +After we've updated `hello` in the index, `git diff-files -p` now shows no differences, but `git diff-index -p HEAD` still *does* show that the current state is different from the state we committed. In fact, now 'git diff-index' shows the same difference whether we use the `--cached` @@ -494,7 +494,7 @@ and it will show what the last commit (in `HEAD`) actually changed. [NOTE] ============ Here is an ASCII art by Jon Loeliger that illustrates how -various diff-\* commands compare things. +various 'diff-{asterisk}' commands compare things. diff-tree +----+ @@ -958,11 +958,11 @@ $ git show-branch --topo-order --more=1 master mybranch The first two lines indicate that it is showing the two branches and the first line of the commit log message from their top-of-the-tree commits, you are currently on `master` branch -(notice the asterisk `\*` character), and the first column for +(notice the asterisk `{asterisk}` character), and the first column for the later output lines is used to show commits contained in the `master` branch, and the second column for the `mybranch` branch. Three commits are shown along with their log messages. -All of them have non blank characters in the first column (`*` +All of them have non blank characters in the first column (`{asterisk}` shows an ordinary commit on the current branch, `-` is a merge commit), which means they are now part of the `master` branch. Only the "Some work" commit has the plus `+` character in the second column, @@ -1092,7 +1092,7 @@ Downloader from http and https URL first obtains the topmost commit object name from the remote site by looking at the specified refname under `repo.git/refs/` directory, and then tries to obtain the -commit object by downloading from `repo.git/objects/xx/xxx\...` +commit object by downloading from `repo.git/objects/xx/xxx...` using the object name of that commit object. Then it reads the commit object to find out its parent commits and the associate tree object; it repeats this process until it gets all the @@ -1420,7 +1420,7 @@ packed, and stores the packed file in `.git/objects/pack` directory. [NOTE] -You will see two files, `pack-\*.pack` and `pack-\*.idx`, +You will see two files, `pack-{asterisk}.pack` and `pack-{asterisk}.idx`, in `.git/objects/pack` directory. They are closely related to each other, and if you ever copy them by hand to a different repository for whatever reason, you should make sure you copy diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index e10fa88b8c..7dc2e8b0bc 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -90,12 +90,12 @@ Patterns have the following format: - Otherwise, git treats the pattern as a shell glob suitable for consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will not match a / in the pathname. - For example, "Documentation/\*.html" matches + For example, "Documentation/{asterisk}.html" matches "Documentation/git.html" but not "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html". - A leading slash matches the beginning of the pathname. - For example, "/*.c" matches "cat-file.c" but not + For example, "/{asterisk}.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c". An example: diff --git a/Documentation/gitmodules.txt b/Documentation/gitmodules.txt index 72a13d18e0..bcffd95ada 100644 --- a/Documentation/gitmodules.txt +++ b/Documentation/gitmodules.txt @@ -44,6 +44,21 @@ submodule.<name>.update:: This config option is overridden if 'git submodule update' is given the '--merge' or '--rebase' options. +submodule.<name>.ignore:: + Defines under what circumstances "git status" and the diff family show + a submodule as modified. When set to "all", it will never be considered + modified, "dirty" will ignore all changes to the submodules work tree and + takes only differences between the HEAD of the submodule and the commit + recorded in the superproject into account. "untracked" will additionally + let submodules with modified tracked files in their work tree show up. + Using "none" (the default when this option is not set) also shows + submodules that have untracked files in their work tree as changed. + If this option is also present in the submodules entry in .git/config of + the superproject, the setting there will override the one found in + .gitmodules. + Both settings can be overridden on the command line by using the + "--ignore-submodule" option. + EXAMPLES -------- diff --git a/Documentation/gitrepository-layout.txt b/Documentation/gitrepository-layout.txt index 3cd32d6803..eb3d040783 100644 --- a/Documentation/gitrepository-layout.txt +++ b/Documentation/gitrepository-layout.txt @@ -16,7 +16,7 @@ You may find these things in your git repository (`.git` directory for a repository associated with your working tree, or `<project>.git` directory for a public 'bare' repository. It is also possible to have a working tree where `.git` is a plain -ascii file containing `gitdir: <path>`, i.e. the path to the +ASCII file containing `gitdir: <path>`, i.e. the path to the real git repository). objects:: diff --git a/Documentation/howto/revert-a-faulty-merge.txt b/Documentation/howto/revert-a-faulty-merge.txt index ff5c0bc27a..6fd711996a 100644 --- a/Documentation/howto/revert-a-faulty-merge.txt +++ b/Documentation/howto/revert-a-faulty-merge.txt @@ -229,7 +229,7 @@ reverting W. Mainline's history would look like this: A---B---C But if you don't actually need to change commit A, then you need some way to -recreate it as a new commit with the same changes in it. The rebase commmand's +recreate it as a new commit with the same changes in it. The rebase command's --no-ff option provides a way to do this: $ git rebase [-i] --no-ff P diff --git a/Documentation/howto/revert-branch-rebase.txt b/Documentation/howto/revert-branch-rebase.txt index 8c32da6deb..093c656048 100644 --- a/Documentation/howto/revert-branch-rebase.txt +++ b/Documentation/howto/revert-branch-rebase.txt @@ -112,25 +112,19 @@ $ git tag pu-anchor pu $ git rebase master * Applying: Redo "revert" using three-way merge machinery. First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: Remove git-apply-patch-script. First trying simple merge strategy to cherry-pick. Simple cherry-pick fails; trying Automatic cherry-pick. Removing Documentation/git-apply-patch-script.txt Removing git-apply-patch-script -Finished one cherry-pick. * Applying: Document "git cherry-pick" and "git revert" First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: mailinfo and applymbox updates First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: Show commits in topo order and name all commits. First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: More documentation updates. First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. ------------------------------------------------ The temporary tag 'pu-anchor' is me just being careful, in case 'git diff --git a/Documentation/install-webdoc.sh b/Documentation/install-webdoc.sh index 34d02a2418..37e67d1a14 100755 --- a/Documentation/install-webdoc.sh +++ b/Documentation/install-webdoc.sh @@ -12,7 +12,7 @@ do then : did not match elif test -f "$T/$h" && - $DIFF -u -I'Last updated [0-9][0-9]-[A-Z][a-z][a-z]-' "$T/$h" "$h" + $DIFF -u -I'^Last updated ' "$T/$h" "$h" then :; # up to date else diff --git a/Documentation/merge-config.txt b/Documentation/merge-config.txt index a403155052..b72f533970 100644 --- a/Documentation/merge-config.txt +++ b/Documentation/merge-config.txt @@ -15,6 +15,16 @@ merge.renameLimit:: during a merge; if not specified, defaults to the value of diff.renameLimit. +merge.renormalize:: + Tell git that canonical representation of files in the + repository has changed over time (e.g. earlier commits record + text files with CRLF line endings, but recent ones use LF line + endings). In such a repository, git can convert the data + recorded in commits to a canonical form before performing a + merge to reduce unnecessary conflicts. For more information, + see section "Merging branches with differing checkin/checkout + attributes" in linkgit:gitattributes[5]. + merge.stat:: Whether to print the diffstat between ORIG_HEAD and the merge result at the end of the merge. True by default. diff --git a/Documentation/merge-strategies.txt b/Documentation/merge-strategies.txt index a5bc1dbb95..049313d601 100644 --- a/Documentation/merge-strategies.txt +++ b/Documentation/merge-strategies.txt @@ -40,6 +40,18 @@ the other tree did, declaring 'our' history contains all that happened in it. theirs;; This is opposite of 'ours'. +renormalize;; + This runs a virtual check-out and check-in of all three stages + of a file when resolving a three-way merge. This option is + meant to be used when merging branches with different clean + filters or end-of-line normalization rules. See "Merging + branches with differing checkin/checkout attributes" in + linkgit:gitattributes[5] for details. + +no-renormalize;; + Disables the `renormalize` option. This overrides the + `merge.renormalize` configuration variable. + subtree[=path];; This option is a more advanced form of 'subtree' strategy, where the strategy makes a guess on how two trees must be shifted to diff --git a/Documentation/pretty-options.txt b/Documentation/pretty-options.txt index d78e121c76..9b6f3899ec 100644 --- a/Documentation/pretty-options.txt +++ b/Documentation/pretty-options.txt @@ -1,5 +1,5 @@ --pretty[='<format>']:: ---format[='<format>']:: +--format='<format>':: Pretty-print the contents of the commit logs in a given format, where '<format>' can be one of 'oneline', 'short', 'medium', diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index cc562a057a..e2237ae4a0 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -321,7 +321,7 @@ excluded from the output. reflog entries from the most recent one to older ones. When this option is used you cannot specify commits to exclude (that is, '{caret}commit', 'commit1..commit2', - nor 'commit1...commit2' notations cannot be used). + nor 'commit1\...commit2' notations cannot be used). + With '\--pretty' format other than oneline (for obvious reasons), this causes the output to have two extra lines of information diff --git a/Documentation/technical/api-merge.txt b/Documentation/technical/api-merge.txt new file mode 100644 index 0000000000..a7e050bb7a --- /dev/null +++ b/Documentation/technical/api-merge.txt @@ -0,0 +1,73 @@ +merge API +========= + +The merge API helps a program to reconcile two competing sets of +improvements to some files (e.g., unregistered changes from the work +tree versus changes involved in switching to a new branch), reporting +conflicts if found. The library called through this API is +responsible for a few things. + + * determining which trees to merge (recursive ancestor consolidation); + + * lining up corresponding files in the trees to be merged (rename + detection, subtree shifting), reporting edge cases like add/add + and rename/rename conflicts to the user; + + * performing a three-way merge of corresponding files, taking + path-specific merge drivers (specified in `.gitattributes`) + into account. + +Low-level (single file) merge +----------------------------- + +`ll_merge`:: + + Perform a three-way single-file merge in core. This is + a thin wrapper around `xdl_merge` that takes the path and + any merge backend specified in `.gitattributes` or + `.git/info/attributes` into account. Returns 0 for a + clean merge. + +The caller: + +1. allocates an mmbuffer_t variable for the result; +2. allocates and fills variables with the file's original content + and two modified versions (using `read_mmfile`, for example); +3. calls ll_merge(); +4. reads the output from result_buf.ptr and result_buf.size; +5. releases buffers when finished (free(ancestor.ptr); free(ours.ptr); + free(theirs.ptr); free(result_buf.ptr);). + +If the modifications do not merge cleanly, `ll_merge` will return a +nonzero value and `result_buf` will generally include a description of +the conflict bracketed by markers such as the traditional `<<<<<<<` +and `>>>>>>>`. + +The `ancestor_label`, `our_label`, and `their_label` parameters are +used to label the different sides of a conflict if the merge driver +supports this. + +The `flag` parameter is a bitfield: + + - The `LL_OPT_VIRTUAL_ANCESTOR` bit indicates whether this is an + internal merge to consolidate ancestors for a recursive merge. + + - The `LL_OPT_FAVOR_MASK` bits allow local conflicts to be automatically + resolved in favor of one side or the other (as in 'git merge-file' + `--ours`/`--theirs`/`--union`). + They can be populated by `create_ll_flag`, whose argument can be + `XDL_MERGE_FAVOR_OURS`, `XDL_MERGE_FAVOR_THEIRS`, or + `XDL_MERGE_FAVOR_UNION`. + +Everything else +--------------- + +Talk about <merge-recursive.h> and merge_file(): + + - merge_trees() to merge with rename detection + - merge_recursive() for ancestor consolidation + - try_merge_command() for other strategies + - conflict format + - merge options + +(Daniel, Miklos, Stephan, JC) diff --git a/Documentation/technical/api-parse-options.txt b/Documentation/technical/api-parse-options.txt index 312e3b2e2b..c5d141cd63 100644 --- a/Documentation/technical/api-parse-options.txt +++ b/Documentation/technical/api-parse-options.txt @@ -201,7 +201,7 @@ The last element of the array must be `OPT_END()`. If not stated otherwise, interpret the arguments as follows: * `short` is a character for the short option - (e.g. `\'e\'` for `-e`, use `0` to omit), + (e.g. `{apostrophe}e{apostrophe}` for `-e`, use `0` to omit), * `long` is a string for the long option (e.g. `"example"` for `\--example`, use `NULL` to omit), @@ -228,10 +228,10 @@ The function must be defined in this form: The callback mechanism is as follows: * Inside `func`, the only interesting member of the structure - given by `opt` is the void pointer `opt->value`. - `\*opt->value` will be the value that is saved into `var`, if you + given by `opt` is the void pointer `opt\->value`. + `\*opt\->value` will be the value that is saved into `var`, if you use `OPT_CALLBACK()`. - For example, do `*(unsigned long *)opt->value = 42;` to get 42 + For example, do `*(unsigned long *)opt\->value = 42;` to get 42 into an `unsigned long` variable. * Return value `0` indicates success and non-zero return diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt index 55b728632c..14af37c3f1 100644 --- a/Documentation/technical/api-tree-walking.txt +++ b/Documentation/technical/api-tree-walking.txt @@ -42,6 +42,8 @@ information. * `data` can be anything the `fn` callback would want to use. +* `show_all_errors` tells whether to stop at the first error or not. + Initializing ------------ diff --git a/Documentation/urls.txt b/Documentation/urls.txt index 1dcd1e7f1e..289019478d 100644 --- a/Documentation/urls.txt +++ b/Documentation/urls.txt @@ -25,7 +25,7 @@ The ssh and git protocols additionally support ~username expansion: - git://host.xz{startsb}:port{endsb}/~{startsb}user{endsb}/path/to/repo.git/ - {startsb}user@{endsb}host.xz:/~{startsb}user{endsb}/path/to/repo.git/ -For local respositories, also supported by git natively, the following +For local repositories, also supported by git natively, the following syntaxes may be used: - /path/to/repo.git/ diff --git a/Documentation/user-manual.txt b/Documentation/user-manual.txt index 22aee34d4a..fecc4eb5b3 100644 --- a/Documentation/user-manual.txt +++ b/Documentation/user-manual.txt @@ -4251,9 +4251,9 @@ Two things are interesting here: negative numbers in case of different errors--and 0 on success. - the variable `sha1` in the function signature of `get_sha1()` is `unsigned - char \*`, but is actually expected to be a pointer to `unsigned + char {asterisk}`, but is actually expected to be a pointer to `unsigned char[20]`. This variable will contain the 160-bit SHA-1 of the given - commit. Note that whenever a SHA-1 is passed as `unsigned char \*`, it + commit. Note that whenever a SHA-1 is passed as `unsigned char {asterisk}`, it is the binary representation, as opposed to the ASCII representation in hex characters, which is passed as `char *`. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index e88f50cafb..f6d301a10f 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v1.7.2 +DEF_VER=v1.7.2.GIT LF=' ' @@ -157,3 +157,36 @@ Issues of note: It has been reported that docbook-xsl version 1.72 and 1.73 are buggy; 1.72 misformats manual pages for callouts, and 1.73 needs the patch in contrib/patches/docbook-xsl-manpages-charmap.patch + + Users attempting to build the documentation on Cygwin may need to ensure + that the /etc/xml/catalog file looks something like this: + + <?xml version="1.0"?> + <!DOCTYPE catalog PUBLIC + "-//OASIS//DTD Entity Resolution XML Catalog V1.0//EN" + "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd" + > + <catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog"> + <rewriteURI + uriStartString = "http://docbook.sourceforge.net/release/xsl/current" + rewritePrefix = "/usr/share/sgml/docbook/xsl-stylesheets" + /> + <rewriteURI + uriStartString="http://www.oasis-open.org/docbook/xml/4.5" + rewritePrefix="/usr/share/sgml/docbook/xml-dtd-4.5" + /> + </catalog> + + This can be achieved with the following two xmlcatalog commands: + + xmlcatalog --noout \ + --add rewriteURI \ + http://docbook.sourceforge.net/release/xsl/current \ + /usr/share/sgml/docbook/xsl-stylesheets \ + /etc/xml/catalog + + xmlcatalog --noout \ + --add rewriteURI \ + http://www.oasis-open.org/docbook/xml/4.5/xsl/current \ + /usr/share/sgml/docbook/xml-dtd-4.5 \ + /etc/xml/catalog @@ -68,6 +68,8 @@ all:: # # Define NO_MKSTEMPS if you don't have mkstemps in the C library. # +# Define NO_STRTOK_R if you don't have strtok_r in the C library. +# # Define NO_LIBGEN_H if you don't have libgen.h. # # Define NEEDS_LIBGEN if your libgen needs -lgen when linking @@ -408,12 +410,17 @@ TEST_PROGRAMS_NEED_X += test-date TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree TEST_PROGRAMS_NEED_X += test-genrandom +TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-match-trees +TEST_PROGRAMS_NEED_X += test-obj-pool TEST_PROGRAMS_NEED_X += test-parse-options TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-string-pool +TEST_PROGRAMS_NEED_X += test-svn-fe +TEST_PROGRAMS_NEED_X += test-treap TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) @@ -468,6 +475,7 @@ export PYTHON_PATH LIB_FILE=libgit.a XDIFF_LIB=xdiff/lib.a +VCSSVN_LIB=vcs-svn/lib.a LIB_H += advice.h LIB_H += archive.h @@ -1035,6 +1043,7 @@ ifeq ($(uname_S),Windows) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease # NEEDS_LIBICONV = YesPlease NO_ICONV = YesPlease @@ -1089,6 +1098,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease NEEDS_LIBICONV = YesPlease OLD_ICONV = YesPlease @@ -1319,6 +1329,10 @@ endif ifdef NO_STRTOULL COMPAT_CFLAGS += -DNO_STRTOULL endif +ifdef NO_STRTOK_R + COMPAT_CFLAGS += -DNO_STRTOK_R + COMPAT_OBJS += compat/strtok_r.o +endif ifdef NO_SETENV COMPAT_CFLAGS += -DNO_SETENV COMPAT_OBJS += compat/setenv.o @@ -1739,7 +1753,9 @@ ifndef NO_CURL endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o -OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) +VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ + vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o +OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) dep_dirs := $(addsuffix .depend,$(sort $(dir $(OBJECTS)))) @@ -1854,12 +1870,18 @@ builtin/prune.o builtin/reflog.o reachable.o: reachable.h builtin/commit.o builtin/revert.o wt-status.o: wt-status.h builtin/tar-tree.o archive-tar.o: tar.h builtin/pack-objects.o: thread-utils.h +connect.o transport.o http-backend.o: url.h http-fetch.o http-walker.o remote-curl.o transport.o walker.o: walker.h -http.o http-walker.o http-push.o remote-curl.o: http.h +http.o http-walker.o http-push.o http-fetch.o remote-curl.o: http.h xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h + +$(VCSSVN_OBJS): \ + vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \ + vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \ + vcs-svn/svndump.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ @@ -1872,12 +1894,16 @@ builtin/init-db.s builtin/init-db.o: EXTRA_CPPFLAGS = \ config.s config.o: EXTRA_CPPFLAGS = -DETC_GITCONFIG='"$(ETC_GITCONFIG_SQ)"' -http.s http.o: EXTRA_CPPFLAGS = -DGIT_USER_AGENT='"git/$(GIT_VERSION)"' +http.s http.o: EXTRA_CPPFLAGS = -DGIT_HTTP_USER_AGENT='"git/$(GIT_VERSION)"' ifdef NO_EXPAT http-walker.s http-walker.o: EXTRA_CPPFLAGS = -DNO_EXPAT endif +ifdef NO_REGEX +compat/regex/regex.o: EXTRA_CPPFLAGS = -DGAWK -DNO_MBSUPPORT +endif + git-%$X: %.o $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -1908,6 +1934,8 @@ $(LIB_FILE): $(LIB_OBJS) $(XDIFF_LIB): $(XDIFF_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS) +$(VCSSVN_LIB): $(VCSSVN_OBJS) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(VCSSVN_OBJS) doc: $(MAKE) -C Documentation all @@ -2006,12 +2034,18 @@ test-date$X: date.o ctype.o test-delta$X: diff-delta.o patch-delta.o +test-line-buffer$X: vcs-svn/lib.a + test-parse-options$X: parse-options.o +test-string-pool$X: vcs-svn/lib.a + +test-svn-fe$X: vcs-svn/lib.a + .PRECIOUS: $(TEST_OBJS) test-%$X: test-%.o $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS) check-sha1:: test-sha1$X ./test-sha1.sh @@ -2075,10 +2109,19 @@ endif bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \ execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \ { test "$$bindir/" = "$$execdir/" || \ - { $(RM) "$$execdir/git$X" && \ + for p in git$X $(filter $(install_bindir_programs),$(ALL_PROGRAMS)); do \ + $(RM) "$$execdir/$$p" && \ test -z "$(NO_CROSS_DIRECTORY_HARDLINKS)" && \ - ln "$$bindir/git$X" "$$execdir/git$X" 2>/dev/null || \ - cp "$$bindir/git$X" "$$execdir/git$X"; } ; } && \ + ln "$$bindir/$$p" "$$execdir/$$p" 2>/dev/null || \ + cp "$$bindir/$$p" "$$execdir/$$p" || exit; \ + done; \ + } && \ + for p in $(filter $(install_bindir_programs),$(BUILT_INS)); do \ + $(RM) "$$bindir/$$p" && \ + ln "$$bindir/git$X" "$$bindir/$$p" 2>/dev/null || \ + ln -s "git$X" "$$bindir/$$p" 2>/dev/null || \ + cp "$$bindir/git$X" "$$bindir/$$p" || exit; \ + done && \ for p in $(BUILT_INS); do \ $(RM) "$$execdir/$$p" && \ ln "$$execdir/git$X" "$$execdir/$$p" 2>/dev/null || \ @@ -2177,8 +2220,8 @@ distclean: clean $(RM) configure clean: - $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o \ - builtin/*.o $(LIB_FILE) $(XDIFF_LIB) + $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \ + builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X $(RM) $(TEST_PROGRAMS) $(RM) -r bin-wrappers @@ -2254,6 +2297,7 @@ check-docs:: documented,gitglossary | \ documented,githooks | \ documented,gitrepository-layout | \ + documented,gitrevisions | \ documented,gittutorial | \ documented,gittutorial-2 | \ documented,git-bisect-lk2009 | \ @@ -1 +1 @@ -Documentation/RelNotes-1.7.2.txt
\ No newline at end of file +Documentation/RelNotes-1.7.3.txt
\ No newline at end of file @@ -22,6 +22,13 @@ char *alias_lookup(const char *alias) return alias_val; } +#define SPLIT_CMDLINE_BAD_ENDING 1 +#define SPLIT_CMDLINE_UNCLOSED_QUOTE 2 +static const char *split_cmdline_errors[] = { + "cmdline ends with \\", + "unclosed quote" +}; + int split_cmdline(char *cmdline, const char ***argv) { int src, dst, count = 0, size = 16; @@ -53,7 +60,7 @@ int split_cmdline(char *cmdline, const char ***argv) if (!c) { free(*argv); *argv = NULL; - return error("cmdline ends with \\"); + return -SPLIT_CMDLINE_BAD_ENDING; } } cmdline[dst++] = c; @@ -66,7 +73,7 @@ int split_cmdline(char *cmdline, const char ***argv) if (quoted) { free(*argv); *argv = NULL; - return error("unclosed quote"); + return -SPLIT_CMDLINE_UNCLOSED_QUOTE; } ALLOC_GROW(*argv, count+1, size); @@ -75,3 +82,6 @@ int split_cmdline(char *cmdline, const char ***argv) return count; } +const char *split_cmdline_strerror(int split_cmdline_errno) { + return split_cmdline_errors[-split_cmdline_errno-1]; +} @@ -33,6 +33,7 @@ static void format_subst(const struct commit *commit, struct strbuf fmt = STRBUF_INIT; struct pretty_print_context ctx = {0}; ctx.date_mode = DATE_NORMAL; + ctx.abbrev = DEFAULT_ABBREV; if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -7,9 +7,9 @@ #define say1(a,b) fprintf(stderr, a, b) #define say2(a,b,c) fprintf(stderr, a, b, c) #else -#define say(a) do {} while(0) -#define say1(a,b) do {} while(0) -#define say2(a,b,c) do {} while(0) +#define say(a) do { /* nothing */ } while (0) +#define say1(a,b) do { /* nothing */ } while (0) +#define say2(a,b,c) do { /* nothing */ } while (0) #endif static const char en85[] = { @@ -141,7 +141,8 @@ static void show_list(const char *debug, int counted, int nr, enum object_type type; unsigned long size; char *buf = read_sha1_file(commit->object.sha1, &type, &size); - char *ep, *sp; + const char *subject_start; + int subject_len; fprintf(stderr, "%c%c%c ", (flags & TREESAME) ? ' ' : 'T', @@ -156,13 +157,9 @@ static void show_list(const char *debug, int counted, int nr, fprintf(stderr, " %.*s", 8, sha1_to_hex(pp->item->object.sha1)); - sp = strstr(buf, "\n\n"); - if (sp) { - sp += 2; - for (ep = sp; *ep && *ep != '\n'; ep++) - ; - fprintf(stderr, " %.*s", (int)(ep - sp), sp); - } + subject_len = find_commit_subject(buf, &subject_start); + if (subject_len) + fprintf(stderr, " %.*s", subject_len, subject_start); fprintf(stderr, "\n"); } } @@ -159,7 +159,7 @@ void create_branch(const char *head, dont_change_ref = 1; else if (!force) die("A branch named '%s' already exists.", name); - else if (!is_bare_repository() && !strcmp(head, name)) + else if (!is_bare_repository() && head && !strcmp(head, name)) die("Cannot force update the current branch."); forcing = 1; } diff --git a/builtin/apply.c b/builtin/apply.c index 12ef9ea8af..23c18c573b 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -416,48 +416,190 @@ static char *squash_slash(char *name) return name; } -static char *find_name(const char *line, char *def, int p_value, int terminate) +static char *find_name_gnu(const char *line, char *def, int p_value) { - int len; - const char *start = NULL; + struct strbuf name = STRBUF_INIT; + char *cp; - if (p_value == 0) - start = line; + /* + * Proposed "new-style" GNU patch/diff format; see + * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2 + */ + if (unquote_c_style(&name, line, NULL)) { + strbuf_release(&name); + return NULL; + } - if (*line == '"') { - struct strbuf name = STRBUF_INIT; + for (cp = name.buf; p_value; p_value--) { + cp = strchr(cp, '/'); + if (!cp) { + strbuf_release(&name); + return NULL; + } + cp++; + } - /* - * Proposed "new-style" GNU patch/diff format; see - * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2 - */ - if (!unquote_c_style(&name, line, NULL)) { - char *cp; + /* name can later be freed, so we need + * to memmove, not just return cp + */ + strbuf_remove(&name, 0, cp - name.buf); + free(def); + if (root) + strbuf_insert(&name, 0, root, root_len); + return squash_slash(strbuf_detach(&name, NULL)); +} - for (cp = name.buf; p_value; p_value--) { - cp = strchr(cp, '/'); - if (!cp) - break; - cp++; - } - if (cp) { - /* name can later be freed, so we need - * to memmove, not just return cp - */ - strbuf_remove(&name, 0, cp - name.buf); - free(def); - if (root) - strbuf_insert(&name, 0, root, root_len); - return squash_slash(strbuf_detach(&name, NULL)); - } - } - strbuf_release(&name); +static size_t tz_len(const char *line, size_t len) +{ + const char *tz, *p; + + if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ') + return 0; + tz = line + len - strlen(" +0500"); + + if (tz[1] != '+' && tz[1] != '-') + return 0; + + for (p = tz + 2; p != line + len; p++) + if (!isdigit(*p)) + return 0; + + return line + len - tz; +} + +static size_t date_len(const char *line, size_t len) +{ + const char *date, *p; + + if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-') + return 0; + p = date = line + len - strlen("72-02-05"); + + if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a date. */ + return 0; + + if (date - line >= strlen("19") && + isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */ + date -= strlen("19"); + + return line + len - date; +} + +static size_t short_time_len(const char *line, size_t len) +{ + const char *time, *p; + + if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':') + return 0; + p = time = line + len - strlen(" 07:01:32"); + + /* Permit 1-digit hours? */ + if (*p++ != ' ' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a time. */ + return 0; + + return line + len - time; +} + +static size_t fractional_time_len(const char *line, size_t len) +{ + const char *p; + size_t n; + + /* Expected format: 19:41:17.620000023 */ + if (!len || !isdigit(line[len - 1])) + return 0; + p = line + len - 1; + + /* Fractional seconds. */ + while (p > line && isdigit(*p)) + p--; + if (*p != '.') + return 0; + + /* Hours, minutes, and whole seconds. */ + n = short_time_len(line, p - line); + if (!n) + return 0; + + return line + len - p + n; +} + +static size_t trailing_spaces_len(const char *line, size_t len) +{ + const char *p; + + /* Expected format: ' ' x (1 or more) */ + if (!len || line[len - 1] != ' ') + return 0; + + p = line + len; + while (p != line) { + p--; + if (*p != ' ') + return line + len - (p + 1); } - for (;;) { + /* All spaces! */ + return len; +} + +static size_t diff_timestamp_len(const char *line, size_t len) +{ + const char *end = line + len; + size_t n; + + /* + * Posix: 2010-07-05 19:41:17 + * GNU: 2010-07-05 19:41:17.620000023 -0500 + */ + + if (!isdigit(end[-1])) + return 0; + + n = tz_len(line, end - line); + end -= n; + + n = short_time_len(line, end - line); + if (!n) + n = fractional_time_len(line, end - line); + end -= n; + + n = date_len(line, end - line); + if (!n) /* No date. Too bad. */ + return 0; + end -= n; + + if (end == line) /* No space before date. */ + return 0; + if (end[-1] == '\t') { /* Success! */ + end--; + return line + len - end; + } + if (end[-1] != ' ') /* No space before date. */ + return 0; + + /* Whitespace damage. */ + end -= trailing_spaces_len(line, end - line); + return line + len - end; +} + +static char *find_name_common(const char *line, char *def, int p_value, + const char *end, int terminate) +{ + int len; + const char *start = NULL; + + if (p_value == 0) + start = line; + while (line != end) { char c = *line; - if (isspace(c)) { + if (!end && isspace(c)) { if (c == '\n') break; if (name_terminate(start, line-start, c, terminate)) @@ -497,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate) return squash_slash(xmemdupz(start, len)); } +static char *find_name(const char *line, char *def, int p_value, int terminate) +{ + if (*line == '"') { + char *name = find_name_gnu(line, def, p_value); + if (name) + return name; + } + + return find_name_common(line, def, p_value, NULL, terminate); +} + +static char *find_name_traditional(const char *line, char *def, int p_value) +{ + size_t len = strlen(line); + size_t date_len; + + if (*line == '"') { + char *name = find_name_gnu(line, def, p_value); + if (name) + return name; + } + + len = strchrnul(line, '\n') - line; + date_len = diff_timestamp_len(line, len); + if (!date_len) + return find_name_common(line, def, p_value, NULL, TERM_TAB); + len -= date_len; + + return find_name_common(line, def, p_value, line + len, 0); +} + static int count_slashes(const char *cp) { int cnt = 0; @@ -519,7 +692,7 @@ static int guess_p_value(const char *nameline) if (is_dev_null(nameline)) return -1; - name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB); + name = find_name_traditional(nameline, NULL, 0); if (!name) return -1; cp = strchr(name, '/'); @@ -638,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc if (is_dev_null(first)) { patch->is_new = 1; patch->is_delete = 0; - name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(second, NULL, p_value); patch->new_name = name; } else if (is_dev_null(second)) { patch->is_new = 0; patch->is_delete = 1; - name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(first, NULL, p_value); patch->old_name = name; } else { - name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); - name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(first, NULL, p_value); + name = find_name_traditional(second, name, p_value); if (has_epoch_timestamp(first)) { patch->is_new = 1; patch->is_delete = 0; @@ -2979,8 +3152,7 @@ static void build_fake_ancestor(struct patch *list, const char *filename) else if (get_sha1(patch->old_sha1_prefix, sha1)) /* git diff has no index line for mode/type changes */ if (!patch->lines_added && !patch->lines_deleted) { - if (get_current_sha1(patch->new_name, sha1) || - get_current_sha1(patch->old_name, sha1)) + if (get_current_sha1(patch->old_name, sha1)) die("mode change for %s, which is not " "in current HEAD", name); sha1_ptr = sha1; @@ -3607,11 +3779,11 @@ static int option_parse_directory(const struct option *opt, return 0; } -int cmd_apply(int argc, const char **argv, const char *unused_prefix) +int cmd_apply(int argc, const char **argv, const char *prefix_) { int i; int errs = 0; - int is_not_gitdir; + int is_not_gitdir = !startup_info->have_repository; int binary; int force_apply = 0; @@ -3684,7 +3856,7 @@ int cmd_apply(int argc, const char **argv, const char *unused_prefix) OPT_END() }; - prefix = setup_git_directory_gently(&is_not_gitdir); + prefix = prefix_; prefix_length = prefix ? strlen(prefix) : 0; git_config(git_apply_config, NULL); if (apply_default_whitespace) diff --git a/builtin/blame.c b/builtin/blame.c index 01e62fdeb0..101535448f 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1407,7 +1407,8 @@ static void get_commit_info(struct commit *commit, int detailed) { int len; - char *tmp, *endp, *reencoded, *message; + const char *subject; + char *reencoded, *message; static char author_name[1024]; static char author_mail[1024]; static char committer_name[1024]; @@ -1449,22 +1450,13 @@ static void get_commit_info(struct commit *commit, &ret->committer_time, &ret->committer_tz); ret->summary = summary_buf; - tmp = strstr(message, "\n\n"); - if (!tmp) { - error_out: + len = find_commit_subject(message, &subject); + if (len && len < sizeof(summary_buf)) { + memcpy(summary_buf, subject, len); + summary_buf[len] = 0; + } else { sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1)); - free(reencoded); - return; } - tmp += 2; - endp = strchr(tmp, '\n'); - if (!endp) - endp = tmp + strlen(tmp); - len = endp - tmp; - if (len >= sizeof(summary_buf) || len == 0) - goto error_out; - memcpy(summary_buf, tmp, len); - summary_buf[len] = 0; free(reencoded); } @@ -2376,11 +2368,11 @@ parse_done: * * The remaining are: * - * (1) if dashdash_pos != 0, its either + * (1) if dashdash_pos != 0, it is either * "blame [revisions] -- <path>" or * "blame -- <path> <rev>" * - * (2) otherwise, its one of the two: + * (2) otherwise, it is one of the two: * "blame [revisions] <path>" * "blame <path> <rev>" * diff --git a/builtin/bundle.c b/builtin/bundle.c index 2006cc5cd5..80649ba0b2 100644 --- a/builtin/bundle.c +++ b/builtin/bundle.c @@ -18,7 +18,6 @@ static const char builtin_bundle_usage[] = int cmd_bundle(int argc, const char **argv, const char *prefix) { struct bundle_header header; - int nongit; const char *cmd, *bundle_file; int bundle_fd = -1; char buffer[PATH_MAX]; @@ -31,7 +30,6 @@ int cmd_bundle(int argc, const char **argv, const char *prefix) argc -= 2; argv += 2; - prefix = setup_git_directory_gently(&nongit); if (prefix && bundle_file[0] != '/') { snprintf(buffer, sizeof(buffer), "%s/%s", prefix, bundle_file); bundle_file = buffer; @@ -54,11 +52,11 @@ int cmd_bundle(int argc, const char **argv, const char *prefix) return !!list_bundle_refs(&header, argc, argv); } if (!strcmp(cmd, "create")) { - if (nongit) + if (!startup_info->have_repository) die("Need a repository to create a bundle."); return !!create_bundle(&header, bundle_file, argc, argv); } else if (!strcmp(cmd, "unbundle")) { - if (nongit) + if (!startup_info->have_repository) die("Need a repository to unbundle."); return !!unbundle(&header, bundle_fd) || list_bundle_refs(&header, argc, argv); diff --git a/builtin/check-ref-format.c b/builtin/check-ref-format.c index b106c65d80..ae3f28115a 100644 --- a/builtin/check-ref-format.c +++ b/builtin/check-ref-format.c @@ -33,28 +33,38 @@ static void collapse_slashes(char *dst, const char *src) *dst = '\0'; } +static int check_ref_format_branch(const char *arg) +{ + struct strbuf sb = STRBUF_INIT; + int nongit; + + setup_git_directory_gently(&nongit); + if (strbuf_check_branch_ref(&sb, arg)) + die("'%s' is not a valid branch name", arg); + printf("%s\n", sb.buf + 11); + return 0; +} + +static int check_ref_format_print(const char *arg) +{ + char *refname = xmalloc(strlen(arg) + 1); + + if (check_ref_format(arg)) + return 1; + collapse_slashes(refname, arg); + printf("%s\n", refname); + return 0; +} + int cmd_check_ref_format(int argc, const char **argv, const char *prefix) { if (argc == 2 && !strcmp(argv[1], "-h")) usage(builtin_check_ref_format_usage); - if (argc == 3 && !strcmp(argv[1], "--branch")) { - struct strbuf sb = STRBUF_INIT; - - if (strbuf_check_branch_ref(&sb, argv[2])) - die("'%s' is not a valid branch name", argv[2]); - printf("%s\n", sb.buf + 11); - exit(0); - } - if (argc == 3 && !strcmp(argv[1], "--print")) { - char *refname = xmalloc(strlen(argv[2]) + 1); - - if (check_ref_format(argv[2])) - exit(1); - collapse_slashes(refname, argv[2]); - printf("%s\n", refname); - exit(0); - } + if (argc == 3 && !strcmp(argv[1], "--branch")) + return check_ref_format_branch(argv[2]); + if (argc == 3 && !strcmp(argv[1], "--print")) + return check_ref_format_print(argv[2]); if (argc != 2) usage(builtin_check_ref_format_usage); return !!check_ref_format(argv[1]); diff --git a/builtin/checkout.c b/builtin/checkout.c index 1994be92c6..aae80c34cf 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -32,7 +32,11 @@ struct checkout_opts { int writeout_stage; int writeout_error; + /* not set by parse_options */ + int branch_exists; + const char *new_branch; + const char *new_branch_force; const char *new_orphan_branch; int new_branch_log; enum branch_track track; @@ -150,6 +154,10 @@ static int checkout_merged(int pos, struct checkout *state) read_mmblob(&ours, active_cache[pos+1]->sha1); read_mmblob(&theirs, active_cache[pos+2]->sha1); + /* + * NEEDSWORK: re-create conflicts from merges with + * merge.renormalize set, too + */ status = ll_merge(&result_buf, path, &ancestor, "base", &ours, "ours", &theirs, "theirs", 0); free(ancestor.ptr); @@ -279,7 +287,6 @@ static void show_local_changes(struct object *head) struct rev_info rev; /* I think we want full paths, even if we're in a subdirectory. */ init_revisions(&rev, NULL); - rev.abbrev = 0; rev.diffopt.output_format |= DIFF_FORMAT_NAME_STATUS; if (diff_setup_done(&rev.diffopt) < 0) die("diff_setup_done failed"); @@ -373,7 +380,7 @@ static int merge_working_tree(struct checkout_opts *opts, topts.src_index = &the_index; topts.dst_index = &the_index; - topts.msgs.not_uptodate_file = "You have local changes to '%s'; cannot switch branches."; + setup_unpack_trees_porcelain(&topts, "checkout"); refresh_cache(REFRESH_QUIET); @@ -433,6 +440,13 @@ static int merge_working_tree(struct checkout_opts *opts, */ add_files_to_cache(NULL, NULL, 0); + /* + * NEEDSWORK: carrying over local changes + * when branches have different end-of-line + * normalization (or clean+smudge rules) is + * a pain; plumb in an option to set + * o.renormalize? + */ init_merge_options(&o); o.verbosity = 0; work = write_tree_from_memory(&o); @@ -511,7 +525,8 @@ static void update_refs_for_switch(struct checkout_opts *opts, } } else - create_branch(old->name, opts->new_branch, new->name, 0, + create_branch(old->name, opts->new_branch, new->name, + opts->new_branch_force ? 1 : 0, opts->new_branch_log, opts->track); new->name = opts->new_branch; setup_branch_path(new); @@ -529,9 +544,12 @@ static void update_refs_for_switch(struct checkout_opts *opts, if (old->path && !strcmp(new->path, old->path)) fprintf(stderr, "Already on '%s'\n", new->name); - else + else if (opts->new_branch) fprintf(stderr, "Switched to%s branch '%s'\n", - opts->new_branch ? " a new" : "", + opts->branch_exists ? " and reset" : " a new", + new->name); + else + fprintf(stderr, "Switched to branch '%s'\n", new->name); } if (old->path && old->name) { @@ -657,7 +675,10 @@ int cmd_checkout(int argc, const char **argv, const char *prefix) int dwim_new_local_branch = 1; struct option options[] = { OPT__QUIET(&opts.quiet), - OPT_STRING('b', NULL, &opts.new_branch, "new branch", "branch"), + OPT_STRING('b', NULL, &opts.new_branch, "branch", + "create and checkout a new branch"), + OPT_STRING('B', NULL, &opts.new_branch_force, "branch", + "create/reset and checkout a branch"), OPT_BOOLEAN('l', NULL, &opts.new_branch_log, "log for new branch"), OPT_SET_INT('t', "track", &opts.track, "track", BRANCH_TRACK_EXPLICIT), @@ -688,6 +709,14 @@ int cmd_checkout(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, options, checkout_usage, PARSE_OPT_KEEP_DASHDASH); + /* we can assume from now on new_branch = !new_branch_force */ + if (opts.new_branch && opts.new_branch_force) + die("-B cannot be used with -b"); + + /* copy -B over to -b, so that we can just check the latter */ + if (opts.new_branch_force) + opts.new_branch = opts.new_branch_force; + if (patch_mode && (opts.track > 0 || opts.new_branch || opts.new_branch_log || opts.merge || opts.force)) die ("--patch is incompatible with all other options"); @@ -709,7 +738,7 @@ int cmd_checkout(int argc, const char **argv, const char *prefix) if (opts.new_orphan_branch) { if (opts.new_branch) - die("--orphan and -b are mutually exclusive"); + die("--orphan and -b|-B are mutually exclusive"); if (opts.track > 0) die("--orphan cannot be used with -t"); opts.new_branch = opts.new_orphan_branch; @@ -858,8 +887,12 @@ no_reference: if (strbuf_check_branch_ref(&buf, opts.new_branch)) die("git checkout: we do not like '%s' as a branch name.", opts.new_branch); - if (!get_sha1(buf.buf, rev)) - die("git checkout: branch %s already exists", opts.new_branch); + if (!get_sha1(buf.buf, rev)) { + opts.branch_exists = 1; + if (!opts.new_branch_force) + die("git checkout: branch %s already exists", + opts.new_branch); + } strbuf_release(&buf); } diff --git a/builtin/clean.c b/builtin/clean.c index fac64e6cd3..b508d2cab4 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -10,12 +10,13 @@ #include "cache.h" #include "dir.h" #include "parse-options.h" +#include "string-list.h" #include "quote.h" static int force = -1; /* unset */ static const char *const builtin_clean_usage[] = { - "git clean [-d] [-f] [-n] [-q] [-x | -X] [--] <paths>...", + "git clean [-d] [-f] [-n] [-q] [-e <pattern>] [-x | -X] [--] <paths>...", NULL }; @@ -26,6 +27,13 @@ static int git_clean_config(const char *var, const char *value, void *cb) return git_default_config(var, value, cb); } +static int exclude_cb(const struct option *opt, const char *arg, int unset) +{ + struct string_list *exclude_list = opt->value; + string_list_append(exclude_list, arg); + return 0; +} + int cmd_clean(int argc, const char **argv, const char *prefix) { int i; @@ -36,6 +44,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix) struct dir_struct dir; static const char **pathspec; struct strbuf buf = STRBUF_INIT; + struct string_list exclude_list = { NULL, 0, 0, 0 }; const char *qname; char *seen = NULL; struct option options[] = { @@ -44,6 +53,8 @@ int cmd_clean(int argc, const char **argv, const char *prefix) OPT_BOOLEAN('f', "force", &force, "force"), OPT_BOOLEAN('d', NULL, &remove_directories, "remove whole directories"), + { OPTION_CALLBACK, 'e', "exclude", &exclude_list, "pattern", + "exclude <pattern>", PARSE_OPT_NONEG, exclude_cb }, OPT_BOOLEAN('x', NULL, &ignored, "remove ignored files, too"), OPT_BOOLEAN('X', NULL, &ignored_only, "remove only ignored files"), @@ -81,6 +92,9 @@ int cmd_clean(int argc, const char **argv, const char *prefix) if (!ignored) setup_standard_excludes(&dir); + for (i = 0; i < exclude_list.nr; i++) + add_exclude(exclude_list.items[i].string, "", 0, dir.exclude_list); + pathspec = get_pathspec(prefix, argv); fill_directory(&dir, pathspec); @@ -167,5 +181,6 @@ int cmd_clean(int argc, const char **argv, const char *prefix) free(seen); strbuf_release(&directory); + string_list_clear(&exclude_list, 0); return (errors != 0); } diff --git a/builtin/clone.c b/builtin/clone.c index efb1e6faa5..19ed64041d 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -361,7 +361,7 @@ static void write_remote_refs(const struct ref *local_refs) int cmd_clone(int argc, const char **argv, const char *prefix) { - int is_bundle = 0; + int is_bundle = 0, is_local; struct stat buf; const char *repo_name, *repo, *work_tree, *git_dir; char *path, *dir; @@ -414,6 +414,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) repo = xstrdup(make_absolute_path(repo_name)); else repo = repo_name; + is_local = path && !is_bundle; + if (is_local && option_depth) + warning("--depth is ignored in local clones; use file:// instead."); if (argc == 2) dir = xstrdup(argv[1]); @@ -514,7 +517,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) strbuf_reset(&value); - if (path && !is_bundle) { + if (is_local) { refs = clone_local(path, git_dir); mapped_refs = wanted_peer_refs(refs, refspec); } else { diff --git a/builtin/commit.c b/builtin/commit.c index a78dbd83bf..66fdd22024 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -25,6 +25,7 @@ #include "rerere.h" #include "unpack-trees.h" #include "quote.h" +#include "submodule.h" static const char * const builtin_commit_usage[] = { "git commit [options] [--] <filepattern>...", @@ -147,7 +148,7 @@ static struct option builtin_commit_options[] = { "terminate entries with NUL"), OPT_BOOLEAN(0, "amend", &amend, "amend previous commit"), OPT_BOOLEAN(0, "no-post-rewrite", &no_post_rewrite, "bypass post-rewrite hook"), - { OPTION_STRING, 'u', "untracked-files", &untracked_files_arg, "mode", "show untracked files, optional modes: all, normal, no. (Default: all)", PARSE_OPT_OPTARG, NULL, (intptr_t)"all" }, + { OPTION_STRING, 'u', "untracked-files", &untracked_files_arg, "mode", "show untracked files, optional modes: all, normal, no (Default: all)", PARSE_OPT_OPTARG, NULL, (intptr_t)"all" }, /* end commit contents options */ { OPTION_BOOLEAN, 0, "allow-empty", &allow_empty, NULL, @@ -1073,6 +1074,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) status_format = STATUS_FORMAT_PORCELAIN; wt_status_prepare(&s); + gitmodules_config(); git_config(git_status_config, &s); in_merge = file_exists(git_path("MERGE_HEAD")); argc = parse_options(argc, argv, prefix, @@ -1163,7 +1165,6 @@ static void print_summary(const char *prefix, const unsigned char *sha1) init_revisions(&rev, prefix); setup_revisions(0, NULL, &rev, NULL); - rev.abbrev = 0; rev.diff = 1; rev.diffopt.output_format = DIFF_FORMAT_SHORTSTAT | DIFF_FORMAT_SUMMARY; diff --git a/builtin/config.c b/builtin/config.c index f3d1660d02..ca4a0db4a7 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -20,7 +20,7 @@ static char delim = '='; static char key_delim = ' '; static char term = '\n'; -static int use_global_config, use_system_config; +static int use_global_config, use_system_config, use_local_config; static const char *given_config_file; static int actions, types; static const char *get_color_slot, *get_colorbool_slot; @@ -51,6 +51,7 @@ static struct option builtin_config_options[] = { OPT_GROUP("Config file location"), OPT_BOOLEAN(0, "global", &use_global_config, "use global config file"), OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"), + OPT_BOOLEAN(0, "local", &use_local_config, "use repository config file"), OPT_STRING('f', "file", &given_config_file, "FILE", "use given config file"), OPT_GROUP("Action"), OPT_BIT(0, "get", &actions, "get value: name [value-regex]", ACTION_GET), @@ -330,11 +331,10 @@ static int get_colorbool(int print) return get_colorbool_found ? 0 : 1; } -int cmd_config(int argc, const char **argv, const char *unused_prefix) +int cmd_config(int argc, const char **argv, const char *prefix) { - int nongit; + int nongit = !startup_info->have_repository; char *value; - const char *prefix = setup_git_directory_gently(&nongit); config_exclusive_filename = getenv(CONFIG_ENVIRONMENT); @@ -342,7 +342,7 @@ int cmd_config(int argc, const char **argv, const char *unused_prefix) builtin_config_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (use_global_config + use_system_config + !!given_config_file > 1) { + if (use_global_config + use_system_config + use_local_config + !!given_config_file > 1) { error("only one config file at a time."); usage_with_options(builtin_config_usage, builtin_config_options); } @@ -358,6 +358,8 @@ int cmd_config(int argc, const char **argv, const char *unused_prefix) } else if (use_system_config) config_exclusive_filename = git_etc_gitconfig(); + else if (use_local_config) + config_exclusive_filename = git_pathdup("config"); else if (given_config_file) { if (!is_absolute_path(given_config_file) && prefix) config_exclusive_filename = prefix_filename(prefix, diff --git a/builtin/diff-files.c b/builtin/diff-files.c index 5b64011de8..951c7c8994 100644 --- a/builtin/diff-files.c +++ b/builtin/diff-files.c @@ -8,6 +8,7 @@ #include "commit.h" #include "revision.h" #include "builtin.h" +#include "submodule.h" static const char diff_files_usage[] = "git diff-files [-q] [-0/-1/2/3 |-c|--cc] [<common diff options>] [<path>...]" @@ -20,6 +21,7 @@ int cmd_diff_files(int argc, const char **argv, const char *prefix) unsigned options = 0; init_revisions(&rev, prefix); + gitmodules_config(); git_config(git_diff_basic_config, NULL); /* no "diff" UI options */ rev.abbrev = 0; diff --git a/builtin/diff-index.c b/builtin/diff-index.c index 04837494fe..2eb32bd9da 100644 --- a/builtin/diff-index.c +++ b/builtin/diff-index.c @@ -3,6 +3,7 @@ #include "commit.h" #include "revision.h" #include "builtin.h" +#include "submodule.h" static const char diff_cache_usage[] = "git diff-index [-m] [--cached] " @@ -17,6 +18,7 @@ int cmd_diff_index(int argc, const char **argv, const char *prefix) int result; init_revisions(&rev, prefix); + gitmodules_config(); git_config(git_diff_basic_config, NULL); /* no "diff" UI options */ rev.abbrev = 0; diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c index 3c78bda566..0d2a3e9fa2 100644 --- a/builtin/diff-tree.c +++ b/builtin/diff-tree.c @@ -3,6 +3,7 @@ #include "commit.h" #include "log-tree.h" #include "builtin.h" +#include "submodule.h" static struct rev_info log_tree_opt; @@ -112,6 +113,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix) int read_stdin = 0; init_revisions(opt, prefix); + gitmodules_config(); git_config(git_diff_basic_config, NULL); /* no "diff" UI options */ opt->abbrev = 0; opt->diff = 1; diff --git a/builtin/diff.c b/builtin/diff.c index 89ae89cde1..a43d326363 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -13,6 +13,7 @@ #include "revision.h" #include "log-tree.h" #include "builtin.h" +#include "submodule.h" struct blobinfo { unsigned char sha1[20]; @@ -279,6 +280,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) */ prefix = setup_git_directory_gently(&nongit); + gitmodules_config(); git_config(git_diff_ui_config, NULL); if (diff_use_color_default == -1) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 9fe25ff0b3..a9bbf8653d 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -27,6 +27,7 @@ static enum { ABORT, VERBATIM, WARN, STRIP } signed_tag_mode = ABORT; static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ABORT; static int fake_missing_tagger; static int no_data; +static int full_tree; static int parse_opt_signed_tag_mode(const struct option *opt, const char *arg, int unset) @@ -147,10 +148,39 @@ static void handle_object(const unsigned char *sha1) free(buf); } +static int depth_first(const void *a_, const void *b_) +{ + const struct diff_filepair *a = *((const struct diff_filepair **)a_); + const struct diff_filepair *b = *((const struct diff_filepair **)b_); + const char *name_a, *name_b; + int len_a, len_b, len; + int cmp; + + name_a = a->one ? a->one->path : a->two->path; + name_b = b->one ? b->one->path : b->two->path; + + len_a = strlen(name_a); + len_b = strlen(name_b); + len = (len_a < len_b) ? len_a : len_b; + + /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */ + cmp = memcmp(name_a, name_b, len); + if (cmp) + return cmp; + return (len_b - len_a); +} + static void show_filemodify(struct diff_queue_struct *q, struct diff_options *options, void *data) { int i; + + /* + * Handle files below a directory first, in case they are all deleted + * and the directory changes to a file or symlink. + */ + qsort(q->queue, q->nr, sizeof(q->queue[0]), depth_first); + for (i = 0; i < q->nr; i++) { struct diff_filespec *ospec = q->queue[i]->one; struct diff_filespec *spec = q->queue[i]->two; @@ -241,7 +271,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) message += 2; if (commit->parents && - get_object_mark(&commit->parents->item->object) != 0) { + get_object_mark(&commit->parents->item->object) != 0 && + !full_tree) { parse_commit(commit->parents->item); diff_tree_sha1(commit->parents->item->tree->object.sha1, commit->tree->object.sha1, "", &rev->diffopt); @@ -281,6 +312,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) i++; } + if (full_tree) + printf("deleteall\n"); log_tree_diff_flush(rev); rev->diffopt.output_format = saved_output_format; @@ -565,8 +598,8 @@ static void import_marks(char *input_file) int cmd_fast_export(int argc, const char **argv, const char *prefix) { struct rev_info revs; - struct object_array commits = { 0, 0, NULL }; - struct string_list extra_refs = { NULL, 0, 0, 0 }; + struct object_array commits = OBJECT_ARRAY_INIT; + struct string_list extra_refs = STRING_LIST_INIT_NODUP; struct commit *commit; char *export_filename = NULL, *import_filename = NULL; struct option options[] = { @@ -584,6 +617,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) "Import marks from this file"), OPT_BOOLEAN(0, "fake-missing-tagger", &fake_missing_tagger, "Fake a tagger when tags lack one"), + OPT_BOOLEAN(0, "full-tree", &full_tree, + "Output full tree for each commit"), { OPTION_NEGBIT, 0, "data", &no_data, NULL, "Skip output of blob data", PARSE_OPT_NOARG | PARSE_OPT_NEGHELP, NULL, 1 }, @@ -608,6 +643,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) if (import_filename) import_marks(import_filename); + if (import_filename && revs.prune_data) + full_tree = 1; + get_tags_and_duplicates(&revs.pending, &extra_refs); if (prepare_revision_walk(&revs)) diff --git a/builtin/fetch.c b/builtin/fetch.c index 6eb1dfea09..fab3fce512 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -544,40 +544,14 @@ static int will_fetch(struct ref **head, const unsigned char *sha1) return 0; } -struct tag_data { - struct ref **head; - struct ref ***tail; -}; - -static int add_to_tail(struct string_list_item *item, void *cb_data) -{ - struct tag_data *data = (struct tag_data *)cb_data; - struct ref *rm = NULL; - - /* We have already decided to ignore this item */ - if (!item->util) - return 0; - - rm = alloc_ref(item->string); - rm->peer_ref = alloc_ref(item->string); - hashcpy(rm->old_sha1, item->util); - - **data->tail = rm; - *data->tail = &rm->next; - - return 0; -} - static void find_non_local_tags(struct transport *transport, struct ref **head, struct ref ***tail) { - struct string_list existing_refs = { NULL, 0, 0, 0 }; - struct string_list remote_refs = { NULL, 0, 0, 0 }; - struct tag_data data; + struct string_list existing_refs = STRING_LIST_INIT_NODUP; + struct string_list remote_refs = STRING_LIST_INIT_NODUP; const struct ref *ref; struct string_list_item *item = NULL; - data.head = head; data.tail = tail; for_each_ref(add_existing, &existing_refs); for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) { @@ -631,10 +605,20 @@ static void find_non_local_tags(struct transport *transport, item->util = NULL; /* - * For all the tags in the remote_refs string list, call - * add_to_tail to add them to the list of refs to be fetched + * For all the tags in the remote_refs string list, + * add them to the list of refs to be fetched */ - for_each_string_list(&remote_refs, add_to_tail, &data); + for_each_string_list_item(item, &remote_refs) { + /* Unless we have already decided to ignore this item... */ + if (item->util) + { + struct ref *rm = alloc_ref(item->string); + rm->peer_ref = alloc_ref(item->string); + hashcpy(rm->old_sha1, item->util); + **tail = rm; + *tail = &rm->next; + } + } string_list_clear(&remote_refs, 0); } @@ -667,7 +651,7 @@ static int truncate_fetch_head(void) static int do_fetch(struct transport *transport, struct refspec *refs, int ref_count) { - struct string_list existing_refs = { NULL, 0, 0, 0 }; + struct string_list existing_refs = STRING_LIST_INIT_NODUP; struct string_list_item *peer_item = NULL; struct ref *ref_map; struct ref *rm; @@ -675,10 +659,12 @@ static int do_fetch(struct transport *transport, for_each_ref(add_existing, &existing_refs); - if (transport->remote->fetch_tags == 2 && tags != TAGS_UNSET) - tags = TAGS_SET; - if (transport->remote->fetch_tags == -1) - tags = TAGS_UNSET; + if (tags == TAGS_DEFAULT) { + if (transport->remote->fetch_tags == 2) + tags = TAGS_SET; + if (transport->remote->fetch_tags == -1) + tags = TAGS_UNSET; + } if (!transport->get_refs_list || !transport->fetch) die("Don't know how to fetch from %s", transport->url); @@ -845,7 +831,8 @@ static int fetch_one(struct remote *remote, int argc, const char **argv) int exit_code; if (!remote) - die("Where do you want to fetch from today?"); + die("No remote repository specified. Please, specify either a URL or a\n" + "remote name from which new revisions should be fetched."); transport = transport_get(remote, NULL); transport_set_verbosity(transport, verbosity, progress); @@ -890,7 +877,7 @@ static int fetch_one(struct remote *remote, int argc, const char **argv) int cmd_fetch(int argc, const char **argv, const char *prefix) { int i; - struct string_list list = { NULL, 0, 0, 0 }; + struct string_list list = STRING_LIST_INIT_NODUP; struct remote *remote; int result = 0; diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index bc3c5e6d3e..e7e12eea25 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -7,7 +7,7 @@ #include "string-list.h" static const char * const fmt_merge_msg_usage[] = { - "git fmt-merge-msg [--log|--no-log] [--file <file>]", + "git fmt-merge-msg [-m <message>] [--log|--no-log] [--file <file>]", NULL }; @@ -38,8 +38,8 @@ void init_src_data(struct src_data *data) data->generic.strdup_strings = 1; } -static struct string_list srcs = { NULL, 0, 0, 1 }; -static struct string_list origins = { NULL, 0, 0, 1 }; +static struct string_list srcs = STRING_LIST_INIT_DUP; +static struct string_list origins = STRING_LIST_INIT_DUP; static int handle_line(char *line) { @@ -146,7 +146,7 @@ static void shortlog(const char *name, unsigned char *sha1, int i, count = 0; struct commit *commit; struct object *branch; - struct string_list subjects = { NULL, 0, 0, 1 }; + struct string_list subjects = STRING_LIST_INIT_DUP; int flags = UNINTERESTING | TREESAME | SEEN | SHOWN | ADDED; struct strbuf sb = STRBUF_INIT; @@ -319,11 +319,14 @@ int fmt_merge_msg_shortlog(struct strbuf *in, struct strbuf *out) { int cmd_fmt_merge_msg(int argc, const char **argv, const char *prefix) { const char *inpath = NULL; + const char *message = NULL; struct option options[] = { OPT_BOOLEAN(0, "log", &merge_summary, "populate log with the shortlog"), { OPTION_BOOLEAN, 0, "summary", &merge_summary, NULL, "alias for --log (deprecated)", PARSE_OPT_NOARG | PARSE_OPT_HIDDEN }, + OPT_STRING('m', "message", &message, "text", + "use <text> as start of message"), OPT_FILENAME('F', "file", &inpath, "file to read from"), OPT_END() }; @@ -337,6 +340,12 @@ int cmd_fmt_merge_msg(int argc, const char **argv, const char *prefix) 0); if (argc > 0) usage_with_options(fmt_merge_msg_usage, options); + if (message && !merge_summary) { + char nl = '\n'; + write_in_full(STDOUT_FILENO, message, strlen(message)); + write_in_full(STDOUT_FILENO, &nl, 1); + return 0; + } if (inpath && strcmp(inpath, "-")) { in = fopen(inpath, "r"); @@ -346,7 +355,12 @@ int cmd_fmt_merge_msg(int argc, const char **argv, const char *prefix) if (strbuf_read(&input, fileno(in), 0) < 0) die_errno("could not read input file"); - ret = fmt_merge_msg(merge_summary, &input, &output); + if (message) { + strbuf_addstr(&output, message); + ret = fmt_merge_msg_shortlog(&input, &output); + } else { + ret = fmt_merge_msg(merge_summary, &input, &output); + } if (ret) return ret; write_in_full(STDOUT_FILENO, output.buf, output.len); diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index a2b28c6962..89e75c6894 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -228,7 +228,8 @@ static void grab_common_values(struct atom_value *val, int deref, struct object v->s = s; } else if (!strcmp(name, "objectname:short")) { - v->s = find_unique_abbrev(obj->sha1, DEFAULT_ABBREV); + v->s = xstrdup(find_unique_abbrev(obj->sha1, + DEFAULT_ABBREV)); } } } diff --git a/builtin/grep.c b/builtin/grep.c index 597f76bc42..da32f3df34 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -834,12 +834,12 @@ int cmd_grep(int argc, const char **argv, const char *prefix) int external_grep_allowed__ignored; const char *show_in_pager = NULL, *default_pager = "dummy"; struct grep_opt opt; - struct object_array list = { 0, 0, NULL }; + struct object_array list = OBJECT_ARRAY_INIT; const char **paths = NULL; - struct string_list path_list = { NULL, 0, 0, 0 }; + struct string_list path_list = STRING_LIST_INIT_NODUP; int i; int dummy; - int nongit = 0, use_index = 1; + int use_index = 1; struct option options[] = { OPT_BOOLEAN(0, "cached", &cached, "search in index instead of in the work tree"), @@ -930,8 +930,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) OPT_END() }; - prefix = setup_git_directory_gently(&nongit); - /* * 'git grep -h', unlike 'git grep -h <pattern>', is a request * to show usage information and exit. @@ -976,7 +974,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_NO_INTERNAL_HELP); - if (use_index && nongit) + if (use_index && !startup_info->have_repository) /* die the same way as if we did it at the beginning */ setup_git_directory(); diff --git a/builtin/help.c b/builtin/help.c index a9836b00ae..61ff79839b 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -120,7 +120,7 @@ static void exec_woman_emacs(const char *path, const char *page) if (!path) path = "emacsclient"; strbuf_addf(&man_page, "(woman \"%s\")", page); - execlp(path, "emacsclient", "-e", man_page.buf, NULL); + execlp(path, "emacsclient", "-e", man_page.buf, (char *)NULL); warning("failed to exec '%s': %s", path, strerror(errno)); } } @@ -148,7 +148,7 @@ static void exec_man_konqueror(const char *path, const char *page) } else path = "kfmclient"; strbuf_addf(&man_page, "man:%s(1)", page); - execlp(path, filename, "newTab", man_page.buf, NULL); + execlp(path, filename, "newTab", man_page.buf, (char *)NULL); warning("failed to exec '%s': %s", path, strerror(errno)); } } @@ -157,7 +157,7 @@ static void exec_man_man(const char *path, const char *page) { if (!path) path = "man"; - execlp(path, "man", page, NULL); + execlp(path, "man", page, (char *)NULL); warning("failed to exec '%s': %s", path, strerror(errno)); } @@ -165,7 +165,7 @@ static void exec_man_cmd(const char *cmd, const char *page) { struct strbuf shell_cmd = STRBUF_INIT; strbuf_addf(&shell_cmd, "%s %s", cmd, page); - execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); + execl("/bin/sh", "sh", "-c", shell_cmd.buf, (char *)NULL); warning("failed to exec '%s': %s", cmd, strerror(errno)); } @@ -372,7 +372,7 @@ static void show_info_page(const char *git_cmd) { const char *page = cmd_to_page(git_cmd); setenv("INFOPATH", system_path(GIT_INFO_PATH), 1); - execlp("info", "info", "gitman", page, NULL); + execlp("info", "info", "gitman", page, (char *)NULL); die("no info viewer handled the request"); } @@ -398,7 +398,7 @@ static void get_html_page_path(struct strbuf *page_path, const char *page) #ifndef open_html static void open_html(const char *path) { - execl_git_cmd("web--browse", "-c", "help.browser", path, NULL); + execl_git_cmd("web--browse", "-c", "help.browser", path, (char *)NULL); } #endif diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a89ae831dd..2e680d7a7a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -884,25 +884,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (argc == 2 && !strcmp(argv[1], "-h")) usage(index_pack_usage); - /* - * We wish to read the repository's config file if any, and - * for that it is necessary to call setup_git_directory_gently(). - * However if the cwd was inside .git/objects/pack/ then we need - * to go back there or all the pack name arguments will be wrong. - * And in that case we cannot rely on any prefix returned by - * setup_git_directory_gently() either. - */ - { - char cwd[PATH_MAX+1]; - int nongit; - - if (!getcwd(cwd, sizeof(cwd)-1)) - die("Unable to get current working directory"); - setup_git_directory_gently(&nongit); - git_config(git_index_pack_config, NULL); - if (chdir(cwd)) - die("Cannot come back to cwd"); - } + read_replace_refs = 0; + + git_config(git_index_pack_config, NULL); + if (prefix && chdir(prefix)) + die("Cannot come back to cwd"); for (i = 1; i < argc; i++) { const char *arg = argv[i]; diff --git a/builtin/log.c b/builtin/log.c index 08b872263c..eaa1ee0fa7 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -125,6 +125,7 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix, rev->show_decorations = 1; load_ref_decorations(decoration_style); } + setup_pager(); } /* @@ -491,12 +492,6 @@ int cmd_log_reflog(int argc, const char **argv, const char *prefix) rev.use_terminator = 1; rev.always_show_header = 1; - /* - * We get called through "git reflog", so unlike the other log - * routines, we need to set up our pager manually.. - */ - setup_pager(); - return cmd_log_walk(&rev); } diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 1b9b8a8b4a..bb4f612b3d 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -25,6 +25,7 @@ static int show_modified; static int show_killed; static int show_valid_bit; static int line_terminator = '\n'; +static int debug_mode; static const char *prefix; static int max_prefix_len; @@ -162,35 +163,41 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) ce_stage(ce)); } write_name(ce->name, ce_namelen(ce)); -} - -static int show_one_ru(struct string_list_item *item, void *cbdata) -{ - const char *path = item->string; - struct resolve_undo_info *ui = item->util; - int i, len; - - len = strlen(path); - if (len < max_prefix_len) - return 0; /* outside of the prefix */ - if (!match_pathspec(pathspec, path, len, max_prefix_len, ps_matched)) - return 0; /* uninterested */ - for (i = 0; i < 3; i++) { - if (!ui->mode[i]) - continue; - printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i], - find_unique_abbrev(ui->sha1[i], abbrev), - i + 1); - write_name(path, len); + if (debug_mode) { + printf(" ctime: %d:%d\n", ce->ce_ctime.sec, ce->ce_ctime.nsec); + printf(" mtime: %d:%d\n", ce->ce_mtime.sec, ce->ce_mtime.nsec); + printf(" dev: %d\tino: %d\n", ce->ce_dev, ce->ce_ino); + printf(" uid: %d\tgid: %d\n", ce->ce_uid, ce->ce_gid); + printf(" size: %d\tflags: %x\n", ce->ce_size, ce->ce_flags); } - return 0; } static void show_ru_info(void) { + struct string_list_item *item; + if (!the_index.resolve_undo) return; - for_each_string_list(the_index.resolve_undo, show_one_ru, NULL); + + for_each_string_list_item(item, the_index.resolve_undo) { + const char *path = item->string; + struct resolve_undo_info *ui = item->util; + int i, len; + + len = strlen(path); + if (len < max_prefix_len) + continue; /* outside of the prefix */ + if (!match_pathspec(pathspec, path, len, max_prefix_len, ps_matched)) + continue; /* uninterested */ + for (i = 0; i < 3; i++) { + if (!ui->mode[i]) + continue; + printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i], + find_unique_abbrev(ui->sha1[i], abbrev), + i + 1); + write_name(path, len); + } + } } static void show_files(struct dir_struct *dir) @@ -519,6 +526,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix) OPT_STRING(0, "with-tree", &with_tree, "tree-ish", "pretend that paths removed since <tree-ish> are still present"), OPT__ABBREV(&abbrev), + OPT_BOOLEAN(0, "debug", &debug_mode, "show debugging data"), OPT_END() }; diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 34480cfad6..97eed4012b 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -32,7 +32,6 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) { int i; const char *dest = NULL; - int nongit; unsigned flags = 0; int quiet = 0; const char *uploadpack = NULL; @@ -42,8 +41,6 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) struct transport *transport; const struct ref *ref; - setup_git_directory_gently(&nongit); - for (i = 1; i < argc; i++) { const char *arg = argv[i]; diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c index e4560da191..99654d0222 100644 --- a/builtin/mailsplit.c +++ b/builtin/mailsplit.c @@ -137,7 +137,7 @@ static int split_maildir(const char *maildir, const char *dir, char name[PATH_MAX]; int ret = -1; int i; - struct string_list list = {NULL, 0, 0, 1}; + struct string_list list = STRING_LIST_INIT_DUP; if (populate_maildir_list(&list, maildir) < 0) goto out; diff --git a/builtin/merge-base.c b/builtin/merge-base.c index 54e7ec2237..96dd160731 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -23,7 +23,8 @@ static int show_merge_base(struct commit **rev, int rev_nr, int show_all) } static const char * const merge_base_usage[] = { - "git merge-base [-a|--all] <commit> <commit>...", + "git merge-base [-a|--all] [--octopus] <commit> <commit>...", + "git merge-base --independent <commit>...", NULL }; @@ -41,21 +42,58 @@ static struct commit *get_commit_reference(const char *arg) return r; } +static int handle_octopus(int count, const char **args, int reduce, int show_all) +{ + struct commit_list *revs = NULL; + struct commit_list *result; + int i; + + if (reduce) + show_all = 1; + + for (i = count - 1; i >= 0; i--) + commit_list_insert(get_commit_reference(args[i]), &revs); + + result = reduce ? reduce_heads(revs) : get_octopus_merge_bases(revs); + + if (!result) + return 1; + + while (result) { + printf("%s\n", sha1_to_hex(result->item->object.sha1)); + if (!show_all) + return 0; + result = result->next; + } + + return 0; +} + int cmd_merge_base(int argc, const char **argv, const char *prefix) { struct commit **rev; int rev_nr = 0; int show_all = 0; + int octopus = 0; + int reduce = 0; struct option options[] = { - OPT_BOOLEAN('a', "all", &show_all, "outputs all common ancestors"), + OPT_BOOLEAN('a', "all", &show_all, "output all common ancestors"), + OPT_BOOLEAN(0, "octopus", &octopus, "find ancestors for a single n-way merge"), + OPT_BOOLEAN(0, "independent", &reduce, "list revs not reachable from others"), OPT_END() }; git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, merge_base_usage, 0); - if (argc < 2) + if (!octopus && !reduce && argc < 2) usage_with_options(merge_base_usage, options); + if (reduce && (show_all || octopus)) + die("--independent cannot be used with other options"); + + if (octopus || reduce) + return handle_octopus(argc, argv, reduce, show_all); + rev = xmalloc(argc * sizeof(*rev)); while (argc-- > 0) rev[rev_nr++] = get_commit_reference(*argv++); diff --git a/builtin/merge-file.c b/builtin/merge-file.c index b8e9e5ba01..b6664d49be 100644 --- a/builtin/merge-file.c +++ b/builtin/merge-file.c @@ -28,7 +28,6 @@ int cmd_merge_file(int argc, const char **argv, const char *prefix) xmparam_t xmp = {{0}}; int ret = 0, i = 0, to_stdout = 0; int quiet = 0; - int nongit; struct option options[] = { OPT_BOOLEAN('p', "stdout", &to_stdout, "send results to standard output"), OPT_SET_INT(0, "diff3", &xmp.style, "use a diff3 based merge", XDL_MERGE_DIFF3), @@ -50,8 +49,7 @@ int cmd_merge_file(int argc, const char **argv, const char *prefix) xmp.style = 0; xmp.favor = 0; - prefix = setup_git_directory_gently(&nongit); - if (!nongit) { + if (startup_info->have_repository) { /* Read the configuration file */ git_config(git_xmerge_config, NULL); if (0 <= git_xmerge_style) diff --git a/builtin/merge-recursive.c b/builtin/merge-recursive.c index d8875d5892..78b9db76a0 100644 --- a/builtin/merge-recursive.c +++ b/builtin/merge-recursive.c @@ -3,6 +3,9 @@ #include "tag.h" #include "merge-recursive.h" +static const char builtin_merge_recursive_usage[] = + "git %s <base>... -- <head> <remote> ..."; + static const char *better_branch_name(const char *branch) { static char githead_env[8 + 40 + 1]; @@ -29,7 +32,7 @@ int cmd_merge_recursive(int argc, const char **argv, const char *prefix) o.subtree_shift = ""; if (argc < 4) - usagef("%s <base>... -- <head> <remote> ...", argv[0]); + usagef(builtin_merge_recursive_usage, argv[0]); for (i = 1; i < argc; ++i) { const char *arg = argv[i]; @@ -45,6 +48,10 @@ int cmd_merge_recursive(int argc, const char **argv, const char *prefix) o.subtree_shift = ""; else if (!prefixcmp(arg+2, "subtree=")) o.subtree_shift = arg + 10; + else if (!strcmp(arg+2, "renormalize")) + o.renormalize = 1; + else if (!strcmp(arg+2, "no-renormalize")) + o.renormalize = 0; else die("Unknown option %s", arg); continue; diff --git a/builtin/merge.c b/builtin/merge.c index 37ce4f589f..5f65c0c8a6 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -54,6 +54,7 @@ static size_t use_strategies_nr, use_strategies_alloc; static const char **xopts; static size_t xopts_nr, xopts_alloc; static const char *branch; +static int option_renormalize; static int verbosity; static int allow_rerere_auto; @@ -131,6 +132,7 @@ static struct strategy *get_strategy(const char *name) ret = xcalloc(1, sizeof(struct strategy)); ret->name = xstrdup(name); + ret->attr = NO_TRIVIAL; return ret; } @@ -437,7 +439,7 @@ static void merge_name(const char *remote, struct strbuf *msg) strbuf_addstr(&truname, "refs/heads/"); strbuf_addstr(&truname, remote); strbuf_setlen(&truname, truname.len - len); - if (resolve_ref(truname.buf, buf_sha, 0, NULL)) { + if (resolve_ref(truname.buf, buf_sha, 1, NULL)) { strbuf_addf(msg, "%s\t\tbranch '%s'%s of .\n", sha1_to_hex(remote_head->sha1), @@ -486,7 +488,8 @@ static int git_merge_config(const char *k, const char *v, void *cb) buf = xstrdup(v); argc = split_cmdline(buf, &argv); if (argc < 0) - die("Bad branch.%s.mergeoptions string", branch); + die("Bad branch.%s.mergeoptions string: %s", branch, + split_cmdline_strerror(argc)); argv = xrealloc(argv, sizeof(*argv) * (argc + 2)); memmove(argv + 1, argv, sizeof(*argv) * (argc + 1)); argc++; @@ -503,6 +506,8 @@ static int git_merge_config(const char *k, const char *v, void *cb) return git_config_string(&pull_octopus, k, v); else if (!strcmp(k, "merge.log") || !strcmp(k, "merge.summary")) option_log = git_config_bool(k, v); + else if (!strcmp(k, "merge.renormalize")) + option_renormalize = git_config_bool(k, v); return git_diff_ui_config(k, v, cb); } @@ -624,6 +629,11 @@ static int try_merge_strategy(const char *strategy, struct commit_list *common, if (!strcmp(strategy, "subtree")) o.subtree_shift = ""; + o.renormalize = option_renormalize; + + /* + * NEEDSWORK: merge with table in builtin/merge-recursive + */ for (x = 0; x < xopts_nr; x++) { if (!strcmp(xopts[x], "ours")) o.recursive_variant = MERGE_RECURSIVE_OURS; @@ -633,6 +643,10 @@ static int try_merge_strategy(const char *strategy, struct commit_list *common, o.subtree_shift = ""; else if (!prefixcmp(xopts[x], "subtree=")) o.subtree_shift = xopts[x]+8; + else if (!strcmp(xopts[x], "renormalize")) + o.renormalize = 1; + else if (!strcmp(xopts[x], "no-renormalize")) + o.renormalize = 0; else die("Unknown option for merge-recursive: -X%s", xopts[x]); } @@ -704,7 +718,7 @@ int checkout_fast_forward(const unsigned char *head, const unsigned char *remote opts.verbose_update = 1; opts.merge = 1; opts.fn = twoway_merge; - opts.msgs = get_porcelain_error_msgs(); + setup_unpack_trees_porcelain(&opts, "merge"); trees[nr_trees] = parse_tree_indirect(head); if (!trees[nr_trees++]) @@ -816,7 +830,7 @@ static int finish_automerge(struct commit_list *common, return 0; } -static int suggest_conflicts(void) +static int suggest_conflicts(int renormalizing) { FILE *fp; int pos; @@ -1301,5 +1315,5 @@ int cmd_merge(int argc, const char **argv, const char *prefix) "stopped before committing as requested\n"); return 0; } else - return suggest_conflicts(); + return suggest_conflicts(option_renormalize); } diff --git a/builtin/mv.c b/builtin/mv.c index 38574b89f7..cdbb09473c 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -63,7 +63,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) const char **source, **destination, **dest_path; enum update_mode { BOTH = 0, WORKING_DIRECTORY, INDEX } *modes; struct stat st; - struct string_list src_for_dst = {NULL, 0, 0, 0}; + struct string_list src_for_dst = STRING_LIST_INIT_NODUP; git_config(git_default_config, NULL); diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 06a38ac8c1..31f5c1c971 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -220,7 +220,7 @@ static void name_rev_line(char *p, struct name_ref_data *data) int cmd_name_rev(int argc, const char **argv, const char *prefix) { - struct object_array revs = { 0, 0, NULL }; + struct object_array revs = OBJECT_ARRAY_INIT; int all = 0, transform_stdin = 0, allow_undefined = 1, always = 0; struct name_ref_data data = { 0, 0, NULL }; struct option opts[] = { diff --git a/builtin/notes.c b/builtin/notes.c index 190005f3cd..fbc347c9f0 100644 --- a/builtin/notes.c +++ b/builtin/notes.c @@ -798,8 +798,9 @@ static int prune(int argc, const char **argv, const char *prefix) struct notes_tree *t; int show_only = 0, verbose = 0; struct option options[] = { - OPT_BOOLEAN('n', NULL, &show_only, "do not remove, show only"), - OPT_BOOLEAN('v', NULL, &verbose, "report pruned notes"), + OPT_BOOLEAN('n', "dry-run", &show_only, + "do not remove, show only"), + OPT_BOOLEAN('v', "verbose", &verbose, "report pruned notes"), OPT_END() }; diff --git a/builtin/prune.c b/builtin/prune.c index 81f915ec31..99218ba49e 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -125,10 +125,9 @@ int cmd_prune(int argc, const char **argv, const char *prefix) { struct rev_info revs; const struct option options[] = { - OPT_BOOLEAN('n', NULL, &show_only, + OPT_BOOLEAN('n', "dry-run", &show_only, "do not remove, show only"), - OPT_BOOLEAN('v', NULL, &verbose, - "report pruned objects"), + OPT_BOOLEAN('v', "verbose", &verbose, "report pruned objects"), OPT_DATE(0, "expire", &expire, "expire objects older than <time>"), OPT_END() diff --git a/builtin/push.c b/builtin/push.c index f4358b9d23..e655eb7695 100644 --- a/builtin/push.c +++ b/builtin/push.c @@ -22,13 +22,13 @@ static int progress; static const char **refspec; static int refspec_nr; +static int refspec_alloc; static void add_refspec(const char *ref) { - int nr = refspec_nr + 1; - refspec = xrealloc(refspec, nr * sizeof(char *)); - refspec[nr-1] = ref; - refspec_nr = nr; + refspec_nr++; + ALLOC_GROW(refspec, refspec_nr, refspec_alloc); + refspec[refspec_nr-1] = ref; } static void set_refspecs(const char **refs, int nr) @@ -130,8 +130,8 @@ static int push_with_options(struct transport *transport, int flags) if (nonfastforward && advice_push_nonfastforward) { fprintf(stderr, "To prevent you from losing history, non-fast-forward updates were rejected\n" - "Merge the remote changes before pushing again. See the 'Note about\n" - "fast-forwards' section of 'git push --help' for details.\n"); + "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n" + "'Note about fast-forwards' section of 'git push --help' for details.\n"); } return 1; diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index d634b5a3d5..760817dbd7 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -530,7 +530,7 @@ static void check_aliased_update(struct command *cmd, struct string_list *list) static void check_aliased_updates(struct command *commands) { struct command *cmd; - struct string_list ref_list = { NULL, 0, 0, 0 }; + struct string_list ref_list = STRING_LIST_INIT_NODUP; for (cmd = commands; cmd; cmd = cmd->next) { struct string_list_item *item = diff --git a/builtin/remote.c b/builtin/remote.c index 6699bc5712..48e0a6bf26 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -134,7 +134,7 @@ static int add_branch(const char *key, const char *branchname, static int add(int argc, const char **argv) { int fetch = 0, mirror = 0, fetch_tags = TAGS_DEFAULT; - struct string_list track = { NULL, 0, 0 }; + struct string_list track = STRING_LIST_INIT_NODUP; const char *master = NULL; struct remote *remote; struct strbuf buf = STRBUF_INIT, buf2 = STRBUF_INIT; @@ -596,7 +596,7 @@ static int mv(int argc, const char **argv) }; struct remote *oldremote, *newremote; struct strbuf buf = STRBUF_INIT, buf2 = STRBUF_INIT, buf3 = STRBUF_INIT; - struct string_list remote_branches = { NULL, 0, 0, 0 }; + struct string_list remote_branches = STRING_LIST_INIT_NODUP; struct rename_info rename; int i; @@ -734,8 +734,8 @@ static int rm(int argc, const char **argv) struct remote *remote; struct strbuf buf = STRBUF_INIT; struct known_remotes known_remotes = { NULL, NULL }; - struct string_list branches = { NULL, 0, 0, 1 }; - struct string_list skipped = { NULL, 0, 0, 1 }; + struct string_list branches = STRING_LIST_INIT_DUP; + struct string_list skipped = STRING_LIST_INIT_DUP; struct branches_for_remote cb_data; int i, result; @@ -1044,7 +1044,7 @@ static int show(int argc, const char **argv) OPT_END() }; struct ref_states states; - struct string_list info_list = { NULL, 0, 0, 0 }; + struct string_list info_list = STRING_LIST_INIT_NODUP; struct show_info info; argc = parse_options(argc, argv, NULL, options, builtin_remote_show_usage, @@ -1483,7 +1483,7 @@ static int get_one_entry(struct remote *remote, void *priv) static int show_all(void) { - struct string_list list = { NULL, 0, 0 }; + struct string_list list = STRING_LIST_INIT_NODUP; int result; list.strdup_strings = 1; diff --git a/builtin/rerere.c b/builtin/rerere.c index 39ad60169d..642bf35587 100644 --- a/builtin/rerere.c +++ b/builtin/rerere.c @@ -1,13 +1,16 @@ #include "builtin.h" #include "cache.h" #include "dir.h" +#include "parse-options.h" #include "string-list.h" #include "rerere.h" #include "xdiff/xdiff.h" #include "xdiff-interface.h" -static const char git_rerere_usage[] = -"git rerere [clear | status | diff | gc]"; +static const char * const rerere_usage[] = { + "git rerere [clear | status | diff | gc]", + NULL, +}; /* these values are days */ static int cutoff_noresolve = 15; @@ -19,6 +22,12 @@ static time_t rerere_created_at(const char *name) return stat(rerere_path(name, "preimage"), &st) ? (time_t) 0 : st.st_mtime; } +static time_t rerere_last_used_at(const char *name) +{ + struct stat st; + return stat(rerere_path(name, "postimage"), &st) ? (time_t) 0 : st.st_mtime; +} + static void unlink_rr_item(const char *name) { unlink(rerere_path(name, "thisimage")); @@ -40,7 +49,7 @@ static int git_rerere_gc_config(const char *var, const char *value, void *cb) static void garbage_collect(struct string_list *rr) { - struct string_list to_remove = { NULL, 0, 0, 1 }; + struct string_list to_remove = STRING_LIST_INIT_DUP; DIR *dir; struct dirent *e; int i, cutoff; @@ -53,11 +62,16 @@ static void garbage_collect(struct string_list *rr) while ((e = readdir(dir))) { if (is_dot_or_dotdot(e->d_name)) continue; - then = rerere_created_at(e->d_name); - if (!then) - continue; - cutoff = (has_rerere_resolution(e->d_name) - ? cutoff_resolve : cutoff_noresolve); + + then = rerere_last_used_at(e->d_name); + if (then) { + cutoff = cutoff_resolve; + } else { + then = rerere_created_at(e->d_name); + if (!then) + continue; + cutoff = cutoff_noresolve; + } if (then < now - cutoff * 86400) string_list_append(&to_remove, e->d_name); } @@ -102,26 +116,27 @@ static int diff_two(const char *file1, const char *label1, int cmd_rerere(int argc, const char **argv, const char *prefix) { - struct string_list merge_rr = { NULL, 0, 0, 1 }; - int i, fd, flags = 0; - - if (2 < argc) { - if (!strcmp(argv[1], "-h")) - usage(git_rerere_usage); - if (!strcmp(argv[1], "--rerere-autoupdate")) - flags = RERERE_AUTOUPDATE; - else if (!strcmp(argv[1], "--no-rerere-autoupdate")) - flags = RERERE_NOAUTOUPDATE; - if (flags) { - argc--; - argv++; - } - } - if (argc < 2) + struct string_list merge_rr = STRING_LIST_INIT_DUP; + int i, fd, autoupdate = -1, flags = 0; + + struct option options[] = { + OPT_SET_INT(0, "rerere-autoupdate", &autoupdate, + "register clean resolutions in index", 1), + OPT_END(), + }; + + argc = parse_options(argc, argv, prefix, options, rerere_usage, 0); + + if (autoupdate == 1) + flags = RERERE_AUTOUPDATE; + if (autoupdate == 0) + flags = RERERE_NOAUTOUPDATE; + + if (argc < 1) return rerere(flags); - if (!strcmp(argv[1], "forget")) { - const char **pathspec = get_pathspec(prefix, argv + 2); + if (!strcmp(argv[0], "forget")) { + const char **pathspec = get_pathspec(prefix, argv + 1); return rerere_forget(pathspec); } @@ -129,26 +144,26 @@ int cmd_rerere(int argc, const char **argv, const char *prefix) if (fd < 0) return 0; - if (!strcmp(argv[1], "clear")) { + if (!strcmp(argv[0], "clear")) { for (i = 0; i < merge_rr.nr; i++) { const char *name = (const char *)merge_rr.items[i].util; if (!has_rerere_resolution(name)) unlink_rr_item(name); } unlink_or_warn(git_path("MERGE_RR")); - } else if (!strcmp(argv[1], "gc")) + } else if (!strcmp(argv[0], "gc")) garbage_collect(&merge_rr); - else if (!strcmp(argv[1], "status")) + else if (!strcmp(argv[0], "status")) for (i = 0; i < merge_rr.nr; i++) printf("%s\n", merge_rr.items[i].string); - else if (!strcmp(argv[1], "diff")) + else if (!strcmp(argv[0], "diff")) for (i = 0; i < merge_rr.nr; i++) { const char *path = merge_rr.items[i].string; const char *name = (const char *)merge_rr.items[i].util; diff_two(rerere_path(name, "preimage"), path, path, path); } else - usage(git_rerere_usage); + usage_with_options(rerere_usage, options); string_list_clear(&merge_rr, 1); return 0; diff --git a/builtin/reset.c b/builtin/reset.c index 1283068fd2..0037be4693 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -318,7 +318,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) * affecting the working tree nor HEAD. */ if (i < argc) { if (reset_type == MIXED) - warning("--mixed option is deprecated with paths."); + warning("--mixed with paths is deprecated; use 'git reset -- <paths>' instead."); else if (reset_type != NONE) die("Cannot do %s reset with paths.", reset_type_names[reset_type]); diff --git a/builtin/revert.c b/builtin/revert.c index 8b9d829a73..4b47ace36b 100644 --- a/builtin/revert.c +++ b/builtin/revert.c @@ -102,9 +102,9 @@ struct commit_message { static int get_message(const char *raw_message, struct commit_message *out) { const char *encoding; - const char *p, *abbrev, *eol; + const char *abbrev, *subject; + int abbrev_len, subject_len; char *q; - int abbrev_len, oneline_len; if (!raw_message) return -1; @@ -125,27 +125,17 @@ static int get_message(const char *raw_message, struct commit_message *out) abbrev = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV); abbrev_len = strlen(abbrev); - /* Find beginning and end of commit subject. */ - p = out->message; - while (*p && (*p != '\n' || p[1] != '\n')) - p++; - if (*p) { - p += 2; - for (eol = p + 1; *eol && *eol != '\n'; eol++) - ; /* do nothing */ - } else - eol = p; - oneline_len = eol - p; + subject_len = find_commit_subject(out->message, &subject); out->parent_label = xmalloc(strlen("parent of ") + abbrev_len + - strlen("... ") + oneline_len + 1); + strlen("... ") + subject_len + 1); q = out->parent_label; q = mempcpy(q, "parent of ", strlen("parent of ")); out->label = q; q = mempcpy(q, abbrev, abbrev_len); q = mempcpy(q, "... ", strlen("... ")); out->subject = q; - q = mempcpy(q, p, oneline_len); + q = mempcpy(q, subject, subject_len); *q = '\0'; return 0; } @@ -241,27 +231,30 @@ static void set_author_ident_env(const char *message) sha1_to_hex(commit->object.sha1)); } -static char *help_msg(void) +static void advise(const char *advice, ...) { - struct strbuf helpbuf = STRBUF_INIT; - char *msg = getenv("GIT_CHERRY_PICK_HELP"); + va_list params; - if (msg) - return msg; + va_start(params, advice); + vreportf("hint: ", advice, params); + va_end(params); +} - strbuf_addstr(&helpbuf, " After resolving the conflicts,\n" - "mark the corrected paths with 'git add <paths>' or 'git rm <paths>'\n" - "and commit the result"); +static void print_advice(void) +{ + char *msg = getenv("GIT_CHERRY_PICK_HELP"); - if (action == CHERRY_PICK) { - strbuf_addf(&helpbuf, " with: \n" - "\n" - " git commit -c %s\n", - sha1_to_hex(commit->object.sha1)); + if (msg) { + fprintf(stderr, "%s\n", msg); + return; } - else - strbuf_addch(&helpbuf, '.'); - return strbuf_detach(&helpbuf, NULL); + + advise("after resolving the conflicts, mark the corrected paths"); + advise("with 'git add <paths>' or 'git rm <paths>'"); + + if (action == CHERRY_PICK) + advise("and commit the result with 'git commit -c %s'", + find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV)); } static void write_message(struct strbuf *msgbuf, const char *filename) @@ -311,10 +304,9 @@ static int fast_forward_to(const unsigned char *to, const unsigned char *from) return write_ref_sha1(ref_lock, to, "cherry-pick"); } -static void do_recursive_merge(struct commit *base, struct commit *next, - const char *base_label, const char *next_label, - unsigned char *head, struct strbuf *msgbuf, - char *defmsg) +static int do_recursive_merge(struct commit *base, struct commit *next, + const char *base_label, const char *next_label, + unsigned char *head, struct strbuf *msgbuf) { struct merge_options o; struct tree *result, *next_tree, *base_tree, *head_tree; @@ -324,6 +316,13 @@ static void do_recursive_merge(struct commit *base, struct commit *next, index_fd = hold_locked_index(&index_lock, 1); read_cache(); + + /* + * NEEDSWORK: cherry-picking between branches with + * different end-of-line normalization is a pain; + * plumb in an option to set o.renormalize? + * (or better: arbitrary -X options) + */ init_merge_options(&o); o.ancestor = base ? base_label : "(empty tree)"; o.branch1 = "HEAD"; @@ -357,14 +356,35 @@ static void do_recursive_merge(struct commit *base, struct commit *next, i++; } } - write_message(msgbuf, defmsg); - fprintf(stderr, "Automatic %s failed.%s\n", - me, help_msg()); - rerere(allow_rerere_auto); - exit(1); } - write_message(msgbuf, defmsg); - fprintf(stderr, "Finished one %s.\n", me); + + return !clean; +} + +/* + * If we are cherry-pick, and if the merge did not result in + * hand-editing, we will hit this commit and inherit the original + * author date and name. + * If we are revert, or if our cherry-pick results in a hand merge, + * we had better say that the current user is responsible for that. + */ +static int run_git_commit(const char *defmsg) +{ + /* 6 is max possible length of our args array including NULL */ + const char *args[6]; + int i = 0; + + args[i++] = "commit"; + args[i++] = "-n"; + if (signoff) + args[i++] = "-s"; + if (!edit) { + args[i++] = "-F"; + args[i++] = defmsg; + } + args[i] = NULL; + + return run_command_v_opt(args, RUN_GIT_CMD); } static int do_pick_commit(void) @@ -375,6 +395,7 @@ static int do_pick_commit(void) struct commit_message msg = { NULL, NULL, NULL, NULL, NULL }; char *defmsg = NULL; struct strbuf msgbuf = STRBUF_INIT; + int res; if (no_commit) { /* @@ -470,63 +491,40 @@ static int do_pick_commit(void) } } - if (!strategy || !strcmp(strategy, "recursive") || action == REVERT) - do_recursive_merge(base, next, base_label, next_label, - head, &msgbuf, defmsg); - else { - int res; + if (!strategy || !strcmp(strategy, "recursive") || action == REVERT) { + res = do_recursive_merge(base, next, base_label, next_label, + head, &msgbuf); + write_message(&msgbuf, defmsg); + } else { struct commit_list *common = NULL; struct commit_list *remotes = NULL; + write_message(&msgbuf, defmsg); + commit_list_insert(base, &common); commit_list_insert(next, &remotes); res = try_merge_command(strategy, common, sha1_to_hex(head), remotes); free_commit_list(common); free_commit_list(remotes); - if (res) { - fprintf(stderr, "Automatic %s with strategy %s failed.%s\n", - me, strategy, help_msg()); - rerere(allow_rerere_auto); - exit(1); - } } - free_message(&msg); - - /* - * - * If we are cherry-pick, and if the merge did not result in - * hand-editing, we will hit this commit and inherit the original - * author date and name. - * If we are revert, or if our cherry-pick results in a hand merge, - * we had better say that the current user is responsible for that. - */ - - if (!no_commit) { - /* 6 is max possible length of our args array including NULL */ - const char *args[6]; - int res; - int i = 0; - - args[i++] = "commit"; - args[i++] = "-n"; - if (signoff) - args[i++] = "-s"; - if (!edit) { - args[i++] = "-F"; - args[i++] = defmsg; - } - args[i] = NULL; - res = run_command_v_opt(args, RUN_GIT_CMD); - free(defmsg); - - return res; + if (res) { + error("could not %s %s... %s", + action == REVERT ? "revert" : "apply", + find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV), + msg.subject); + print_advice(); + rerere(allow_rerere_auto); + } else { + if (!no_commit) + res = run_git_commit(defmsg); } + free_message(&msg); free(defmsg); - return 0; + return res; } static void prepare_revs(struct rev_info *revs) diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 0a9681ba7e..2135b0dde1 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -249,7 +249,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) { static struct shortlog log; static struct rev_info rev; - int nongit; + int nongit = !startup_info->have_repository; static const struct option options[] = { OPT_BOOLEAN('n', "numbered", &log.sort_by_number, @@ -265,7 +265,6 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) struct parse_opt_ctx_t ctx; - prefix = setup_git_directory_gently(&nongit); git_config(git_default_config, NULL); shortlog_init(&log); init_revisions(&rev, prefix); diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 0b2a9ad1a9..be9b512eeb 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -120,7 +120,7 @@ static int add_existing(const char *refname, const unsigned char *sha1, int flag */ static int exclude_existing(const char *match) { - static struct string_list existing_refs = { NULL, 0, 0, 0 }; + static struct string_list existing_refs = STRING_LIST_INIT_NODUP; char buf[1024]; int matchlen = match ? strlen(match) : 0; diff --git a/builtin/var.c b/builtin/var.c index 70fdb4dec7..0744bb8318 100644 --- a/builtin/var.c +++ b/builtin/var.c @@ -74,14 +74,9 @@ static int show_config(const char *var, const char *value, void *cb) int cmd_var(int argc, const char **argv, const char *prefix) { - const char *val; - int nongit; - if (argc != 2) { + const char *val = NULL; + if (argc != 2) usage(var_usage); - } - - setup_git_directory_gently(&nongit); - val = NULL; if (strcmp(argv[1], "-l") == 0) { git_config(show_config, NULL); diff --git a/cache-tree.c b/cache-tree.c index d91743775d..c60cf9140d 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -328,9 +328,11 @@ static int update_one(struct cache_tree *it, mode = ce->ce_mode; entlen = pathlen - baselen; } - if (mode != S_IFGITLINK && !missing_ok && !has_sha1_file(sha1)) + if (mode != S_IFGITLINK && !missing_ok && !has_sha1_file(sha1)) { + strbuf_release(&buffer); return error("invalid object %06o %s for '%.*s'", mode, sha1_to_hex(sha1), entlen+baselen, path); + } if (ce->ce_flags & CE_REMOVE) continue; /* entry being removed */ @@ -179,8 +179,7 @@ struct cache_entry { #define CE_UNHASHED (0x200000) #define CE_CONFLICTED (0x800000) -/* Only remove in work directory, not index */ -#define CE_WT_REMOVE (0x400000) +#define CE_WT_REMOVE (0x400000) /* remove in work directory */ #define CE_UNPACKED (0x1000000) @@ -449,7 +448,7 @@ extern int init_db(const char *template_dir, unsigned int flags); alloc = alloc_nr(alloc); \ x = xrealloc((x), alloc * sizeof(*(x))); \ } \ - } while(0) + } while (0) /* Initialize and use the cache information */ extern int read_index(struct index_state *); @@ -641,6 +640,9 @@ extern char *git_pathdup(const char *fmt, ...) /* Return a statically allocated filename matching the sha1 signature */ extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); extern char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +extern char *git_path_submodule(const char *path, const char *fmt, ...) + __attribute__((format (printf, 2, 3))); + extern char *sha1_file_name(const unsigned char *sha1); extern char *sha1_pack_name(const unsigned char *sha1); extern char *sha1_pack_index_name(const unsigned char *sha1); @@ -811,6 +813,7 @@ const char *show_date_relative(unsigned long time, int tz, char *timebuf, size_t timebuf_size); int parse_date(const char *date, char *buf, int bufsize); +int parse_date_basic(const char *date, unsigned long *timestamp, int *offset); void datestamp(char *buf, int bufsize); #define approxidate(s) approxidate_careful((s), NULL) unsigned long approxidate_careful(const char *, int *); @@ -1054,6 +1057,7 @@ extern void trace_argv_printf(const char **argv, const char *format, ...); extern int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe); extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); +extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); /* add */ /* @@ -1096,6 +1100,14 @@ void overlay_tree_on_cache(const char *tree_name, const char *prefix); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); +/* Takes a negative value returned by split_cmdline */ +const char *split_cmdline_strerror(int cmdline_errno); + +/* git.c */ +struct startup_info { + int have_repository; +}; +extern struct startup_info *startup_info; /* builtin/merge.c */ int checkout_fast_forward(const unsigned char *from, const unsigned char *to); @@ -315,6 +315,25 @@ int parse_commit(struct commit *item) return ret; } +int find_commit_subject(const char *commit_buffer, const char **subject) +{ + const char *eol; + const char *p = commit_buffer; + + while (*p && (*p != '\n' || p[1] != '\n')) + p++; + if (*p) { + p += 2; + for (eol = p; *eol && *eol != '\n'; eol++) + ; /* do nothing */ + } else + eol = p; + + *subject = p; + + return eol - p; +} + struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p) { struct commit_list *new_list = xmalloc(sizeof(struct commit_list)); @@ -41,6 +41,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size); int parse_commit(struct commit *item); +/* Find beginning and length of commit subject. */ +int find_commit_subject(const char *commit_buffer, const char **subject); + struct commit_list * commit_list_insert(struct commit *item, struct commit_list **list_p); unsigned commit_list_count(const struct commit_list *l); struct commit_list * insert_by_date(struct commit *item, struct commit_list **list); diff --git a/compat/mingw.c b/compat/mingw.c index 9a8e336582..f2d9e1fd97 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -304,8 +304,13 @@ int mingw_utime (const char *file_name, const struct utimbuf *times) goto revert_attrs; } - time_t_to_filetime(times->modtime, &mft); - time_t_to_filetime(times->actime, &aft); + if (times) { + time_t_to_filetime(times->modtime, &mft); + time_t_to_filetime(times->actime, &aft); + } else { + GetSystemTimeAsFileTime(&mft); + aft = mft; + } if (!SetFileTime((HANDLE)_get_osfhandle(fh), NULL, &aft, &mft)) { errno = EINVAL; rc = -1; @@ -641,7 +646,7 @@ static char *lookup_prog(const char *dir, const char *cmd, int isexe, int exe_on } /* - * Determines the absolute path of cmd using the the split path in path. + * Determines the absolute path of cmd using the split path in path. * If cmd contains a slash or backslash, no lookup is performed. */ static char *path_lookup(const char *cmd, char **path, int exe_only) diff --git a/compat/nedmalloc/malloc.c.h b/compat/nedmalloc/malloc.c.h index 74c42e3162..87260d2642 100644 --- a/compat/nedmalloc/malloc.c.h +++ b/compat/nedmalloc/malloc.c.h @@ -2069,7 +2069,7 @@ static void init_malloc_global_mutex() { Each freshly allocated chunk must have both cinuse and pinuse set. That is, each allocated chunk borders either a previously allocated and still in-use chunk, or the base of its memory arena. This is - ensured by making all allocations from the the `lowest' part of any + ensured by making all allocations from the `lowest' part of any found chunk. Further, no free chunk physically borders another one, so each free chunk is known to be preceded and followed by either inuse chunks or the ends of memory. diff --git a/compat/regex/regcomp.c b/compat/regex/regcomp.c new file mode 100644 index 0000000000..8c96ed942c --- /dev/null +++ b/compat/regex/regcomp.c @@ -0,0 +1,3884 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const char *class_name, + const char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + + +#ifdef ZOS_USS + +/* For ZOS USS we must define btowc */ + +wchar_t +btowc (int c) +{ + wchar_t wtmp[2]; + char tmp[2]; + + tmp[0] = c; + tmp[1] = 0; + + mbtowc (wtmp, tmp, 1); + return wtmp[0]; +} +#endif + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (const char *pattern, + size_t length, + struct re_pattern_buffer *bufp) +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (reg_syntax_t syntax) +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (struct re_pattern_buffer *bufp) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (__mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + re_free (buf); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + int i; + +# ifdef _LIBC + /* See if we have to try all bytes which start multiple collation + elements. + e.g. In da_DK, we want to catch 'a' since "aa" is a valid + collation element, and don't catch 'b' since 'b' is + the only collation element which starts from 'b' (and + it is caught by SIMPLE_BRACKET). */ + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 + && (cset->ncoll_syms || cset->nranges)) + { + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# endif /* _LIBC */ + + /* See if we have to start the match at all multibyte characters, + i.e. where we would not find an invalid sequence. This only + applies to multibyte character sets; for single byte character + sets, the SIMPLE_BRACKET again suffices. */ + if (dfa->mb_cur_max > 1 + && (cset->nchar_classes || cset->non_match || cset->nranges +# ifdef _LIBC + || cset->nequiv_classes +# endif /* _LIBC */ + )) + { + unsigned char c = 0; + do + { + mbstate_t mbs; + memset (&mbs, 0, sizeof (mbs)); + if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) + re_set_fastmap (fastmap, false, (int) c); + } + while (++c != 0); + } + + else + { + /* ... Else catch all bytes which can start the mbchars. */ + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, false, *(unsigned char *) buf); + } + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (regex_t *__restrict preg, + const char *__restrict pattern, + int cflags) +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror(int errcode, const regex_t *__restrict preg, + char *__restrict errbuf, size_t errbuf_size) +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +#if __GNUC__ >= 3 +static const bitset_t utf8_sb_map = { + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#else /* ! (__GNUC__ >= 3) */ +static bitset_t utf8_sb_map; +#endif /* __GNUC__ >= 3 */ +#endif /* RE_ENABLE_I18N */ + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + int i, j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; +#ifndef _LIBC + char *codeset_name; +#endif + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else +# ifdef HAVE_LANGINFO_CODESET + codeset_name = nl_langinfo (CODESET); +# else + codeset_name = getenv ("LC_ALL"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LC_CTYPE"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LANG"); + if (codeset_name == NULL) + codeset_name = ""; + else if (strchr (codeset_name, '.') != NULL) + codeset_name = strchr (codeset_name, '.') + 1; +# endif + + /* strcasecmp isn't a standard interface. brute force check */ +#if 0 + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) + dfa->is_utf8 = 1; +#else + if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u') + && (codeset_name[1] == 'T' || codeset_name[1] == 't') + && (codeset_name[2] == 'F' || codeset_name[2] == 'f') + && (codeset_name[3] == '-' + ? codeset_name[4] == '8' && codeset_name[5] == '\0' + : codeset_name[3] == '8' && codeset_name[4] == '\0')) + dfa->is_utf8 = 1; +#endif + + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + { +#if !defined(__GNUC__) || __GNUC__ < 3 + static short utf8_sb_map_inited = 0; + + if (! utf8_sb_map_inited) + { + int i; + + utf8_sb_map_inited = 0; + for (i = 0; i <= 0x80 / BITSET_WORD_BITS - 1; i++) + utf8_sb_map[i] = BITSET_WORD_MAX; + } +#endif + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + } + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + reg_errcode_t err = re_node_set_merge (&init_nodes, + dfa->eclosures + + dest_idx); + if (err != REG_NOERROR) + return err; + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.ctx_type) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. It's okay to test + opr.ctx_type since constraints (for all DFA nodes) are + created by ORing one or more opr.ctx_type values. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + if (node->token.type == ANCHOR) + dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* If the node is root_node itself, it means the epsilon clsoure + has a loop. Then tie it to the destination of the root_node. */ + if (org_node == root_node && clone_node != org_node) + { + ret = re_node_set_insert (dfa->edests + clone_node, org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + /* In case of the node has another constraint, add it. */ + constraint |= dfa->nodes[org_node].constraint; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There is no such duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There is a duplicated node which satisfies the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + int node_idx, incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + int i; + re_node_set eclosure; + int ret; + int incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + /* If the current node has constraints, duplicate all nodes + since they must inherit the constraints. */ + if (dfa->nodes[node].constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, + dfa->nodes[node].constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + err = re_node_set_merge (&eclosure, &eclosure_elem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* An epsilon closure includes itself. */ + ret = re_node_set_insert (&eclosure, node); + if (BE (ret < 0, 0)) + return REG_ESPACE; + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression <reg_exp>: + CAT + / \ + / \ + <reg_exp> EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + <branch1>|<branch2>: + ALT + / \ + / \ + <branch1> <branch2> + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + <exp1><exp2>: + CAT + / \ + / \ + <exp1> <exp2> + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", + it must not be "<ANCHOR(^)><REPEAT(*)>". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + "alnum", + "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + "space", + "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (<reg_exp>): + SUBEXP + | + <reg_exp> +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); +#ifndef RE_TOKEN_INIT_BUG + re_token_t start_token = *token; +#else + re_token_t start_token; + + memcpy ((void *) &start_token, (void *) token, sizeof start_token); +#endif + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* <re>{} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); +#ifdef GAWK + /* + * Fedora Core 2, maybe others, have broken `btowc' that returns -1 + * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are + * unsigned, so we don't have sign extension problems. + */ + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? start_ch : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? end_ch : end_elem->opr.wch); +#else + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); +#endif + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environment. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + if (nrules != 0) + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\n'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + (const char *) start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [:<character_class>:], [.<collating_element>.], and + [=<equivalent_class>=]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include <locale/weight.h> + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1 & 0xffffff]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + /* Compare only if the length matches and the collation rule + index is the same. */ + if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) + { + int cnt = 0; + + while (cnt <= len && + weights[(idx1 & 0xffffff) + 1 + cnt] + == weights[(idx2 & 0xffffff) + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0)) + class_name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (class_name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (class_name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (class_name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (class_name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (class_name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (class_name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (class_name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (class_name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (class_name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (class_name, "blank") == 0) +#ifndef GAWK + BUILD_CHARCLASS_LOOP (isblank); +#else + /* see comments above */ + BUILD_CHARCLASS_LOOP (is_blank); +#endif + else if (strcmp (class_name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (class_name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (class_name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const char *class_name, + const char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} diff --git a/compat/regex/regex.c b/compat/regex/regex.c index 556d8ab11f..3dd8dfa01f 100644 --- a/compat/regex/regex.c +++ b/compat/regex/regex.c @@ -1,4924 +1,87 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for - internationalization features.) +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - Copyright (C) 1993 Free Software Foundation, Inc. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* AIX requires this to be the first thing in the file. */ -#if defined (_AIX) && !defined (REGEX_MALLOC) - #pragma alloca -#endif - -#define _GNU_SOURCE - -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -#include <sys/types.h> - -/* We used to test for `BSTRING' here, but only GCC and Emacs define - `BSTRING', as far as I know, and neither of them use this code. */ -#include <string.h> -#ifndef bcmp -#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) -#endif -#ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) -#endif -#ifndef bzero -#define bzero(s, n) memset ((s), 0, (n)) -#endif - -#include <stdlib.h> - - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -#ifndef Sword -#define Sword 1 -#endif - -#ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -/* How many characters in the character set. */ -#define CHAR_SET_SIZE 256 - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void -init_syntax_once () -{ - register int c; - static int done = 0; - - if (done) - return; - - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 'a'; c <= 'z'; c++) - re_syntax_table[c] = Sword; - - for (c = 'A'; c <= 'Z'; c++) - re_syntax_table[c] = Sword; - - for (c = '0'; c <= '9'; c++) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -#endif /* not SYNTAX_TABLE */ - -#define SYNTAX(c) re_syntax_table[c] - - -/* Get the interface, including the syntax bits. */ -#include "regex.h" - -/* isalpha etc. are used for the character classes. */ -#include <ctype.h> - -#ifndef isascii -#define isascii(c) 1 -#endif - -#ifdef isblank -#define ISBLANK(c) (isascii (c) && isblank (c)) -#else -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -#define ISGRAPH(c) (isascii (c) && isgraph (c)) -#else -#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) -#endif - -#define ISPRINT(c) (isascii (c) && isprint (c)) -#define ISDIGIT(c) (isascii (c) && isdigit (c)) -#define ISALNUM(c) (isascii (c) && isalnum (c)) -#define ISALPHA(c) (isascii (c) && isalpha (c)) -#define ISCNTRL(c) (isascii (c) && iscntrl (c)) -#define ISLOWER(c) (isascii (c) && islower (c)) -#define ISPUNCT(c) (isascii (c) && ispunct (c)) -#define ISSPACE(c) (isascii (c) && isspace (c)) -#define ISUPPER(c) (isascii (c) && isupper (c)) -#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) - -#ifndef NULL -#define NULL 0 -#endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -#ifdef REGEX_MALLOC - -#define REGEX_ALLOCATE malloc -#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) - -#else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -#ifndef alloca - -/* Make alloca work the best possible way. */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not __GNUC__ */ -#if HAVE_ALLOCA_H -#include <alloca.h> -#else /* not __GNUC__ or HAVE_ALLOCA_H */ -#ifndef _AIX /* Already did AIX, up at the top. */ -char *alloca (); -#endif /* not _AIX */ -#endif /* not HAVE_ALLOCA_H */ -#endif /* not __GNUC__ */ - -#endif /* not alloca */ - -#define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -#define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - bcopy (source, destination, osize), \ - destination) - -#endif /* not REGEX_MALLOC */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -#define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -#define BYTEWIDTH 8 /* In bits. */ - -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -#define false 0 -#define true 1 - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. - - The value of `exactn' is needed in search.c (search_buffer) in Emacs. - So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of - `exactn' we use here must also be 1. */ - -typedef enum -{ - no_op = 0, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn = 1, - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -#ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -#endif /* emacs */ -} re_opcode_t; - -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ - -#define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ - -#define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += 2; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ - -#define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) - -#ifdef DEBUG -static void -extract_number (dest, source) - int *dest; - unsigned char *source; -{ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -} - -#ifndef EXTRACT_MACROS /* To debug the macros. */ -#undef EXTRACT_NUMBER -#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += 2; \ - } while (0) - -#ifdef DEBUG -static void -extract_number_and_incr (destination, source) - int *destination; - unsigned char **source; -{ - extract_number (destination, *source); - *source += 2; -} - -#ifndef EXTRACT_MACROS -#undef EXTRACT_NUMBER_AND_INCR -#define EXTRACT_NUMBER_AND_INCR(dest, src) \ - extract_number_and_incr (&dest, &src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -#ifdef DEBUG - -/* We use standard I/O for debugging. */ -#include <stdio.h> - -/* It is useful to test things that ``must'' be true when debugging. */ -#include <assert.h> - -static int debug = 0; - -#define DEBUG_STATEMENT(e) e -#define DEBUG_PRINT1(x) if (debug) printf (x) -#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) - - -extern void printchar (); - -/* Print the fastmap in human-readable form. */ - -void -print_fastmap (fastmap) - char *fastmap; -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - printchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - printchar (i - 1); - } - } - } - putchar ('\n'); -} - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -void -print_partial_compiled_pattern (start, end) - unsigned char *start; - unsigned char *end; -{ - int mcnt, mcnt2; - unsigned char *p = start; - unsigned char *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - printchar (*p++); - } - while (--mcnt); - break; - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%d", mcnt, *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%d", mcnt, *p++); - break; - - case duplicate: - printf ("/duplicate/%d", *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { - register int c; - - printf ("/charset%s", - (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); - - assert (p + *p < pend); - - for (c = 0; c < *p; c++) - { - unsigned bit; - unsigned char map_byte = p[1 + c]; - - putchar ('/'); - - for (bit = 0; bit < BYTEWIDTH; bit++) - if (map_byte & (1 << bit)) - printchar (c * BYTEWIDTH + bit); - } - p += 1 + *p; - break; - } - - case begline: - printf ("/begline"); - break; - - case endline: - printf ("/endline"); - break; - - case on_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_jump/0/%d", mcnt); - break; - - case on_failure_keep_string_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_keep_string_jump/0/%d", mcnt); - break; - - case dummy_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/dummy_failure_jump/0/%d", mcnt); - break; - - case push_dummy_failure: - printf ("/push_dummy_failure"); - break; - - case maybe_pop_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/maybe_pop_jump/0/%d", mcnt); - break; - - case pop_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/pop_failure_jump/0/%d", mcnt); - break; - - case jump_past_alt: - extract_number_and_incr (&mcnt, &p); - printf ("/jump_past_alt/0/%d", mcnt); - break; - - case jump: - extract_number_and_incr (&mcnt, &p); - printf ("/jump/0/%d", mcnt); - break; - - case succeed_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); - break; - - case jump_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); - break; - - case set_number_at: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); - break; - - case wordbound: - printf ("/wordbound"); - break; - - case notwordbound: - printf ("/notwordbound"); - break; - - case wordbeg: - printf ("/wordbeg"); - break; - - case wordend: - printf ("/wordend"); - -#ifdef emacs - case before_dot: - printf ("/before_dot"); - break; - - case at_dot: - printf ("/at_dot"); - break; - - case after_dot: - printf ("/after_dot"); - break; - - case syntaxspec: - printf ("/syntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; - - case notsyntaxspec: - printf ("/notsyntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; -#endif /* emacs */ - - case wordchar: - printf ("/wordchar"); - break; - - case notwordchar: - printf ("/notwordchar"); - break; - - case begbuf: - printf ("/begbuf"); - break; - - case endbuf: - printf ("/endbuf"); - break; - - default: - printf ("?%d", *(p-1)); - } - } - printf ("/\n"); -} - - -void -print_compiled_pattern (bufp) - struct re_pattern_buffer *bufp; -{ - unsigned char *buffer = bufp->buffer; - - print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); - - if (bufp->fastmap_accurate && bufp->fastmap) - { - printf ("fastmap: "); - print_fastmap (bufp->fastmap); - } - - printf ("re_nsub: %d\t", bufp->re_nsub); - printf ("regs_alloc: %d\t", bufp->regs_allocated); - printf ("can_be_null: %d\t", bufp->can_be_null); - printf ("newline_anchor: %d\n", bufp->newline_anchor); - printf ("no_sub: %d\t", bufp->no_sub); - printf ("not_bol: %d\t", bufp->not_bol); - printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %d\n", bufp->syntax); - /* Perhaps we should print the translate table? */ -} - - -void -print_double_string (where, string1, size1, string2, size2) - const char *where; - const char *string1; - const char *string2; - int size1; - int size2; -{ - unsigned this_char; - - if (where == NULL) - printf ("(null)"); - else - { - if (FIRST_STRING_P (where)) - { - for (this_char = where - string1; this_char < size1; this_char++) - printchar (string1[this_char]); - - where = string2; - } - - for (this_char = where - string2; this_char < size2; this_char++) - printchar (string2[this_char]); - } -} - -#else /* not DEBUG */ - -#undef assert -#define assert(e) - -#define DEBUG_STATEMENT(e) -#define DEBUG_PRINT1(x) -#define DEBUG_PRINT2(x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) - -#endif /* not DEBUG */ - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. */ - -static const char *re_error_msg[] = - { NULL, /* REG_NOERROR */ - "No match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "Invalid collation character", /* REG_ECOLLATE */ - "Invalid character class name", /* REG_ECTYPE */ - "Trailing backslash", /* REG_EESCAPE */ - "Invalid back reference", /* REG_ESUBREG */ - "Unmatched [ or [^", /* REG_EBRACK */ - "Unmatched ( or \\(", /* REG_EPAREN */ - "Unmatched \\{", /* REG_EBRACE */ - "Invalid content of \\{\\}", /* REG_BADBR */ - "Invalid range end", /* REG_ERANGE */ - "Memory exhausted", /* REG_ESPACE */ - "Invalid preceding regular expression", /* REG_BADRPT */ - "Premature end of regular expression", /* REG_EEND */ - "Regular expression too big", /* REG_ESIZE */ - "Unmatched ) or \\)", /* REG_ERPAREN */ - }; - -/* Subroutine declarations and macros for regex_compile. */ - -static void store_op1 (), store_op2 (); -static void insert_op1 (), insert_op2 (); -static boolean at_begline_loc_p (), at_endline_loc_p (); -static boolean group_in_compile_stack (); -static reg_errcode_t compile_range (); - -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -#define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = translate[c]; \ - } while (0) - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - } while (0) - -/* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- - - -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) - - -/* Macros for outputting the compiled pattern into `buffer'. */ - -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 32 - -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - while (b - bufp->buffer + (n) > bufp->allocated) \ - EXTEND_BUFFER () - -/* Make sure we have one more byte of buffer space and then add C to it. */ -#define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (unsigned char) (c); \ - } while (0) - - -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -#define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - } while (0) - - -/* As with BUF_PUSH_2, except for three bytes. */ -#define BUF_PUSH_3(c1, c2, c3) \ - do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - *b++ = (unsigned char) (c3); \ - } while (0) - - -/* Store a jump with opcode OP at LOC to location TO. We store a - relative address offset by the three bytes the jump itself occupies. */ -#define STORE_JUMP(op, loc, to) \ - store_op1 (op, loc, (to) - (loc) - 3) - -/* Likewise, for a two-argument jump. */ -#define STORE_JUMP2(op, loc, to, arg) \ - store_op2 (op, loc, (to) - (loc) - 3, arg) - -/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP(op, loc, to) \ - insert_op1 (op, loc, (to) - (loc) - 3, b) - -/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP2(op, loc, to, arg) \ - insert_op2 (op, loc, (to) - (loc) - 3, arg, b) - - -/* This is not an arbitrary limit: the arguments which represent offsets - into the pattern are two bytes long. So if 2^16 bytes turns out to - be too small, many things would have to change. */ -#define MAX_BUF_SIZE (1L << 16) - - -/* Extend the buffer by twice its current size via realloc and - reset the pointers that pointed into the old block to point to the - correct places in the new one. If extending the buffer results in it - being larger than MAX_BUF_SIZE, then flag memory exhausted. */ -#define EXTEND_BUFFER() \ - do { \ - unsigned char *old_buffer = bufp->buffer; \ - if (bufp->allocated == MAX_BUF_SIZE) \ - return REG_ESIZE; \ - bufp->allocated <<= 1; \ - if (bufp->allocated > MAX_BUF_SIZE) \ - bufp->allocated = MAX_BUF_SIZE; \ - bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ - if (bufp->buffer == NULL) \ - return REG_ESPACE; \ - /* If the buffer moved, move all the pointers into it. */ \ - if (old_buffer != bufp->buffer) \ - { \ - b = (b - old_buffer) + bufp->buffer; \ - begalt = (begalt - old_buffer) + bufp->buffer; \ - if (fixup_alt_jump) \ - fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ - if (laststart) \ - laststart = (laststart - old_buffer) + bufp->buffer; \ - if (pending_exact) \ - pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ - } \ - } while (0) - - -/* Since we have one byte reserved for the register number argument to - {start,stop}_memory, the maximum number of groups we can report - things about is what fits in that byte. */ -#define MAX_REGNUM 255 - -/* But patterns can have more than `MAX_REGNUM' registers. We just - ignore the excess. */ -typedef unsigned regnum_t; - - -/* Macros for the compile stack. */ - -/* Since offsets can go either forwards or backwards, this type needs to - be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ -typedef int pattern_offset_t; - -typedef struct -{ - pattern_offset_t begalt_offset; - pattern_offset_t fixup_alt_jump; - pattern_offset_t inner_group_offset; - pattern_offset_t laststart_offset; - regnum_t regnum; -} compile_stack_elt_t; - - -typedef struct -{ - compile_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} compile_stack_type; - - -#define INIT_COMPILE_STACK_SIZE 32 - -#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) -#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) - -/* The next available element. */ -#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - - -/* Set the bit for character C in a list. */ -#define SET_LIST_BIT(c) \ - (b[((unsigned char) (c)) / BYTEWIDTH] \ - |= 1 << (((unsigned char) c) % BYTEWIDTH)) - - -/* Get the next unsigned number in the uncompiled pattern. */ -#define GET_UNSIGNED_NUMBER(num) \ - { if (p != pend) \ - { \ - PATFETCH (c); \ - while (ISDIGIT (c)) \ - { \ - if (num < 0) \ - num = 0; \ - num = num * 10 + c - '0'; \ - if (p == pend) \ - break; \ - PATFETCH (c); \ - } \ - } \ - } - -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ - -#define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) - -/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. - Returns one of error codes defined in `regex.h', or zero for success. - - Assumes the `allocated' (and perhaps `buffer') and `translate' - fields are set in BUFP on entry. - - If it succeeds, results are put in BUFP (if it returns an error, the - contents of BUFP are undefined): - `buffer' is the compiled pattern; - `syntax' is set to SYNTAX; - `used' is set to the length of the compiled pattern; - `fastmap_accurate' is zero; - `re_nsub' is the number of subexpressions in PATTERN; - `not_bol' and `not_eol' are zero; - - The `fastmap' and `newline_anchor' fields are neither - examined nor set. */ - -static reg_errcode_t -regex_compile (pattern, size, syntax, bufp) - const char *pattern; - int size; - reg_syntax_t syntax; - struct re_pattern_buffer *bufp; -{ - /* We fetch characters from PATTERN here. Even though PATTERN is - `char *' (i.e., signed), we declare these variables as unsigned, so - they can be reliably used as array indices. */ - register unsigned char c, c1; - - /* A random temporary spot in PATTERN. */ - const char *p1; - - /* Points to the end of the buffer, where we should append. */ - register unsigned char *b; - - /* Keeps track of unclosed groups. */ - compile_stack_type compile_stack; - - /* Points to the current (ending) position in the pattern. */ - const char *p = pattern; - const char *pend = pattern + size; - - /* How to translate the characters in the pattern. */ - char *translate = bufp->translate; - - /* Address of the count-byte of the most recently inserted `exactn' - command. This makes it possible to tell if a new exact-match - character can be added to that command or if the character requires - a new `exactn' command. */ - unsigned char *pending_exact = 0; - - /* Address of start of the most recently finished expression. - This tells, e.g., postfix * where to find the start of its - operand. Reset at the beginning of groups and alternatives. */ - unsigned char *laststart = 0; - - /* Address of beginning of regexp, or inside of last group. */ - unsigned char *begalt; - - /* Place in the uncompiled pattern (i.e., the {) to - which to go back if the interval is invalid. */ - const char *beg_interval; - - /* Address of the place where a forward jump should go to the end of - the containing expression. Each alternative of an `or' -- except the - last -- ends with a forward jump of this sort. */ - unsigned char *fixup_alt_jump = 0; - - /* Counts open-groups as they are encountered. Remembered for the - matching close-group on the compile stack, so the same register - number is put in the stop_memory as the start_memory. */ - regnum_t regnum = 0; - -#ifdef DEBUG - DEBUG_PRINT1 ("\nCompiling pattern: "); - if (debug) - { - unsigned debug_count; - - for (debug_count = 0; debug_count < size; debug_count++) - printchar (pattern[debug_count]); - putchar ('\n'); - } -#endif /* DEBUG */ - - /* Initialize the compile stack. */ - compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); - if (compile_stack.stack == NULL) - return REG_ESPACE; - - compile_stack.size = INIT_COMPILE_STACK_SIZE; - compile_stack.avail = 0; - - /* Initialize the pattern buffer. */ - bufp->syntax = syntax; - bufp->fastmap_accurate = 0; - bufp->not_bol = bufp->not_eol = 0; - - /* Set `used' to zero, so that if we return an error, the pattern - printer (for debugging) will think there's no pattern. We reset it - at the end. */ - bufp->used = 0; - - /* Always count groups, whether or not bufp->no_sub is set. */ - bufp->re_nsub = 0; - -#if !defined (emacs) && !defined (SYNTAX_TABLE) - /* Initialize the syntax table. */ - init_syntax_once (); -#endif - - if (bufp->allocated == 0) - { - if (bufp->buffer) - { /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. */ - RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); - } - else - { /* Caller did not allocate a buffer. Do it for them. */ - bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); - } - if (!bufp->buffer) return REG_ESPACE; - - bufp->allocated = INIT_BUF_SIZE; - } - - begalt = b = bufp->buffer; - - /* Loop through the uncompiled pattern until we're at the end. */ - while (p != pend) - { - PATFETCH (c); - - switch (c) - { - case '^': - { - if ( /* If at start of pattern, it's an operator. */ - p == pattern + 1 - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's come before. */ - || at_begline_loc_p (pattern, p, syntax)) - BUF_PUSH (begline); - else - goto normal_char; - } - break; - - - case '$': - { - if ( /* If at end of pattern, it's an operator. */ - p == pend - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's next. */ - || at_endline_loc_p (p, pend, syntax)) - BUF_PUSH (endline); - else - goto normal_char; - } - break; - - - case '+': - case '?': - if ((syntax & RE_BK_PLUS_QM) - || (syntax & RE_LIMITED_OPS)) - goto normal_char; - handle_plus: - case '*': - /* If there is no previous pattern... */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; - else if (!(syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } - - { - /* Are we optimizing this jump? */ - boolean keep_string_p = false; - - /* 1 means zero (many) matches is allowed. */ - char zero_times_ok = 0, many_times_ok = 0; - - /* If there is a sequence of repetition chars, collapse it - down to just one (the right one). We can't combine - interval operators with these because of, e.g., `a{2}*', - which should only match an even number of `a's. */ - - for (;;) - { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; - - if (p == pend) - break; - - PATFETCH (c); - - if (c == '*' - || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) - ; - - else if (syntax & RE_BK_PLUS_QM && c == '\\') - { - if (p == pend) return REG_EESCAPE; - - PATFETCH (c1); - if (!(c1 == '+' || c1 == '?')) - { - PATUNFETCH; - PATUNFETCH; - break; - } - - c = c1; - } - else - { - PATUNFETCH; - break; - } - - /* If we get here, we found another repeat character. */ - } - - /* Star, etc. applied to an empty pattern is equivalent - to an empty pattern. */ - if (!laststart) - break; - - /* Now we know whether or not zero matches is allowed - and also whether or not two or more matches is allowed. */ - if (many_times_ok) - { /* More than one repetition is allowed, so put in at the - end a backward relative jump from `b' to before the next - jump we're going to put in below (which jumps from - laststart to after this jump). - - But if we are at the `*' in the exact sequence `.*\n', - insert an unconditional jump backwards to the ., - instead of the beginning of the loop. This way we only - push a failure point once, instead of every time - through the loop. */ - assert (p - 1 > pattern); - - /* Allocate the space for the jump. */ - GET_BUFFER_SPACE (3); - - /* We know we are not at the first character of the pattern, - because laststart was nonzero. And we've already - incremented `p', by the way, to be the character after - the `*'. Do we have to do something analogous here - for null bytes, because of RE_DOT_NOT_NULL? */ - if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') - && zero_times_ok - && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') - && !(syntax & RE_DOT_NEWLINE)) - { /* We have .*\n. */ - STORE_JUMP (jump, b, laststart); - keep_string_p = true; - } - else - /* Anything else. */ - STORE_JUMP (maybe_pop_jump, b, laststart - 3); - - /* We've added more stuff to the buffer. */ - b += 3; - } - - /* On failure, jump from laststart to b + 3, which will be the - end of the buffer after this jump is inserted. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump - : on_failure_jump, - laststart, b + 3); - pending_exact = 0; - b += 3; - - if (!zero_times_ok) - { - /* At least one repetition is required, so insert a - `dummy_failure_jump' before the initial - `on_failure_jump' instruction of the loop. This - effects a skip over that instruction the first time - we hit that loop. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); - b += 3; - } - } - break; - - - case '.': - laststart = b; - BUF_PUSH (anychar); - break; - - - case '[': - { - boolean had_char_class = false; - - if (p == pend) return REG_EBRACK; - - /* Ensure that we have enough space to push a charset: the - opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); - - laststart = b; - - /* We test `*p == '^' twice, instead of using an if - statement, so we only need one BUF_PUSH. */ - BUF_PUSH (*p == '^' ? charset_not : charset); - if (*p == '^') - p++; - - /* Remember the first position in the bracket expression. */ - p1 = p; - - /* Push the number of bytes in the bitmap. */ - BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); - - /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); - - /* charset_not matches newline according to a syntax bit. */ - if ((re_opcode_t) b[-2] == charset_not - && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) - SET_LIST_BIT ('\n'); - - /* Read in characters and ranges, setting map bits. */ - for (;;) - { - if (p == pend) return REG_EBRACK; - - PATFETCH (c); - - /* \ might escape characters inside [...] and [^...]. */ - if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') - { - if (p == pend) return REG_EESCAPE; - - PATFETCH (c1); - SET_LIST_BIT (c1); - continue; - } - - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p != p1 + 1) - break; - - /* Look ahead to see if it's a range when the last thing - was a character class. */ - if (had_char_class && c == '-' && *p != ']') - return REG_ERANGE; - - /* Look ahead to see if it's a range when the last thing - was a character: if this is a hyphen not at the - beginning or the end of a list, then it's the range - operator. */ - if (c == '-' - && !(p - 2 >= pattern && p[-2] == '[') - && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') - && *p != ']') - { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; - } - - else if (p[0] == '-' && p[1] != ']') - { /* This handles ranges made up of characters only. */ - reg_errcode_t ret; - - /* Move past the `-'. */ - PATFETCH (c1); - - ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; - } - - /* See if we're at the beginning of a possible character - class. */ - - else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') - { /* Leave room for the null. */ - char str[CHAR_CLASS_MAX_LENGTH + 1]; - - PATFETCH (c); - c1 = 0; - - /* If pattern is `[[:'. */ - if (p == pend) return REG_EBRACK; - - for (;;) - { - PATFETCH (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) - break; - str[c1++] = c; - } - str[c1] = '\0'; - - /* If isn't a word bracketed by `[:' and:`]': - undo the ending character, the letters, and leave - the leading `:' and `[' (but set bits for them). */ - if (c == ':' && *p == ']') - { - int ch; - boolean is_alnum = STREQ (str, "alnum"); - boolean is_alpha = STREQ (str, "alpha"); - boolean is_blank = STREQ (str, "blank"); - boolean is_cntrl = STREQ (str, "cntrl"); - boolean is_digit = STREQ (str, "digit"); - boolean is_graph = STREQ (str, "graph"); - boolean is_lower = STREQ (str, "lower"); - boolean is_print = STREQ (str, "print"); - boolean is_punct = STREQ (str, "punct"); - boolean is_space = STREQ (str, "space"); - boolean is_upper = STREQ (str, "upper"); - boolean is_xdigit = STREQ (str, "xdigit"); - - if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; - - /* Throw away the ] at the end of the character - class. */ - PATFETCH (c); - - if (p == pend) return REG_EBRACK; - - for (ch = 0; ch < 1 << BYTEWIDTH; ch++) - { - if ( (is_alnum && ISALNUM (ch)) - || (is_alpha && ISALPHA (ch)) - || (is_blank && ISBLANK (ch)) - || (is_cntrl && ISCNTRL (ch)) - || (is_digit && ISDIGIT (ch)) - || (is_graph && ISGRAPH (ch)) - || (is_lower && ISLOWER (ch)) - || (is_print && ISPRINT (ch)) - || (is_punct && ISPUNCT (ch)) - || (is_space && ISSPACE (ch)) - || (is_upper && ISUPPER (ch)) - || (is_xdigit && ISXDIGIT (ch))) - SET_LIST_BIT (ch); - } - had_char_class = true; - } - else - { - c1++; - while (c1--) - PATUNFETCH; - SET_LIST_BIT ('['); - SET_LIST_BIT (':'); - had_char_class = false; - } - } - else - { - had_char_class = false; - SET_LIST_BIT (c); - } - } - - /* Discard any (non)matching list bytes that are all 0 at the - end of the map. Decrease the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; - b += b[-1]; - } - break; - - - case '(': - if (syntax & RE_NO_BK_PARENS) - goto handle_open; - else - goto normal_char; - - - case ')': - if (syntax & RE_NO_BK_PARENS) - goto handle_close; - else - goto normal_char; - - - case '\n': - if (syntax & RE_NEWLINE_ALT) - goto handle_alt; - else - goto normal_char; - - - case '|': - if (syntax & RE_NO_BK_VBAR) - goto handle_alt; - else - goto normal_char; - - - case '{': - if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) - goto handle_interval; - else - goto normal_char; - - - case '\\': - if (p == pend) return REG_EESCAPE; - - /* Do not translate the character after the \, so that we can - distinguish, e.g., \B from \b, even if we normally would - translate, e.g., B to b. */ - PATFETCH_RAW (c); - - switch (c) - { - case '(': - if (syntax & RE_NO_BK_PARENS) - goto normal_backslash; - - handle_open: - bufp->re_nsub++; - regnum++; - - if (COMPILE_STACK_FULL) - { - RETALLOC (compile_stack.stack, compile_stack.size << 1, - compile_stack_elt_t); - if (compile_stack.stack == NULL) return REG_ESPACE; - - compile_stack.size <<= 1; - } - - /* These are the values to restore when we hit end of this - group. They are all relative offsets, so that if the - whole pattern moves because of realloc, they will still - be valid. */ - COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; - COMPILE_STACK_TOP.fixup_alt_jump - = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; - COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; - COMPILE_STACK_TOP.regnum = regnum; - - /* We will eventually replace the 0 with the number of - groups inner to this one. But do not push a - start_memory for groups beyond the last one we can - represent in the compiled pattern. */ - if (regnum <= MAX_REGNUM) - { - COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; - BUF_PUSH_3 (start_memory, regnum, 0); - } - - compile_stack.avail++; - - fixup_alt_jump = 0; - laststart = 0; - begalt = b; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - break; - - - case ')': - if (syntax & RE_NO_BK_PARENS) goto normal_backslash; - - if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - return REG_ERPAREN; - } - - handle_close: - if (fixup_alt_jump) - { /* Push a dummy failure point at the end of the - alternative for a possible future - `pop_failure_jump' to pop. See comments at - `push_dummy_failure' in `re_match_2'. */ - BUF_PUSH (push_dummy_failure); - - /* We allocated space for this jump when we assigned - to `fixup_alt_jump', in the `handle_alt' case below. */ - STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); - } - - /* See similar code for backslashed left paren above. */ - if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - return REG_ERPAREN; - } - - /* Since we just checked for an empty stack above, this - ``can't happen''. */ - assert (compile_stack.avail != 0); - { - /* We don't just want to restore into `regnum', because - later groups should continue to be numbered higher, - as in `(ab)c(de)' -- the second group is #2. */ - regnum_t this_group_regnum; - - compile_stack.avail--; - begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; - fixup_alt_jump - = COMPILE_STACK_TOP.fixup_alt_jump - ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 - : 0; - laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; - this_group_regnum = COMPILE_STACK_TOP.regnum; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - - /* We're at the end of the group, so now we know how many - groups were inside this one. */ - if (this_group_regnum <= MAX_REGNUM) - { - unsigned char *inner_group_loc - = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; - - *inner_group_loc = regnum - this_group_regnum; - BUF_PUSH_3 (stop_memory, this_group_regnum, - regnum - this_group_regnum); - } - } - break; - - - case '|': /* `\|'. */ - if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) - goto normal_backslash; - handle_alt: - if (syntax & RE_LIMITED_OPS) - goto normal_char; - - /* Insert before the previous alternative a jump which - jumps to this alternative if the former fails. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (on_failure_jump, begalt, b + 6); - pending_exact = 0; - b += 3; - - /* The alternative before this one has a jump after it - which gets executed if it gets matched. Adjust that - jump so it will jump to this alternative's analogous - jump (put in below, which in turn will jump to the next - (if any) alternative's such jump, etc.). The last such - jump jumps to the correct final destination. A picture: - _____ _____ - | | | | - | v | v - a | b | c - - If we are at `b', then fixup_alt_jump right now points to a - three-byte space after `a'. We'll put in the jump, set - fixup_alt_jump to right after `b', and leave behind three - bytes which we'll fill in when we get to after `c'. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - /* Mark and leave space for a jump after this alternative, - to be filled in later either by next alternative or - when know we're at the end of a series of alternatives. */ - fixup_alt_jump = b; - GET_BUFFER_SPACE (3); - b += 3; - - laststart = 0; - begalt = b; - break; - - - case '{': - /* If \{ is a literal. */ - if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval - operator. */ - || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - || (p - 2 == pattern && p == pend)) - goto normal_backslash; - - handle_interval: - { - /* If got here, then the syntax allows intervals. */ - - /* At least (most) this many matches must be made. */ - int lower_bound = -1, upper_bound = -1; - - beg_interval = p - 1; - - if (p == pend) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_EBRACE; - } - - GET_UNSIGNED_NUMBER (lower_bound); - - if (c == ',') - { - GET_UNSIGNED_NUMBER (upper_bound); - if (upper_bound < 0) upper_bound = RE_DUP_MAX; - } - else - /* Interval such as `{1}' => match exactly once. */ - upper_bound = lower_bound; - - if (lower_bound < 0 || upper_bound > RE_DUP_MAX - || lower_bound > upper_bound) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_BADBR; - } - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (c != '\\') return REG_EBRACE; - - PATFETCH (c); - } - - if (c != '}') - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_BADBR; - } - - /* We just parsed a valid interval. */ - - /* If it's invalid to have no preceding re. */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; - else if (syntax & RE_CONTEXT_INDEP_OPS) - laststart = b; - else - goto unfetch_interval; - } - - /* If the upper bound is zero, don't want to succeed at - all; jump from `laststart' to `b + 3', which will be - the end of the buffer after we insert the jump. */ - if (upper_bound == 0) - { - GET_BUFFER_SPACE (3); - INSERT_JUMP (jump, laststart, b + 3); - b += 3; - } - - /* Otherwise, we have a nontrivial interval. When - we're all done, the pattern will look like: - set_number_at <jump count> <upper bound> - set_number_at <succeed_n count> <lower bound> - succeed_n <after jump addr> <succeed_n count> - <body of loop> - jump_n <succeed_n addr> <jump count> - (The upper bound and `jump_n' are omitted if - `upper_bound' is 1, though.) */ - else - { /* If the upper bound is > 1, we need to insert - more at the end of the loop. */ - unsigned nbytes = 10 + (upper_bound > 1) * 10; - - GET_BUFFER_SPACE (nbytes); - - /* Initialize lower bound of the `succeed_n', even - though it will be set during matching by its - attendant `set_number_at' (inserted next), - because `re_compile_fastmap' needs to know. - Jump to the `jump_n' we might insert below. */ - INSERT_JUMP2 (succeed_n, laststart, - b + 5 + (upper_bound > 1) * 5, - lower_bound); - b += 5; - - /* Code to initialize the lower bound. Insert - before the `succeed_n'. The `5' is the last two - bytes of this `set_number_at', plus 3 bytes of - the following `succeed_n'. */ - insert_op2 (set_number_at, laststart, 5, lower_bound, b); - b += 5; - - if (upper_bound > 1) - { /* More than one repetition is allowed, so - append a backward jump to the `succeed_n' - that starts this interval. - - When we've reached this during matching, - we'll have matched the interval once, so - jump back only `upper_bound - 1' times. */ - STORE_JUMP2 (jump_n, b, laststart + 5, - upper_bound - 1); - b += 5; - - /* The location we want to set is the second - parameter of the `jump_n'; that is `b-2' as - an absolute address. `laststart' will be - the `set_number_at' we're about to insert; - `laststart+3' the number to set, the source - for the relative address. But we are - inserting into the middle of the pattern -- - so everything is getting moved up by 5. - Conclusion: (b - 2) - (laststart + 3) + 5, - i.e., b - laststart. - - We insert this at the beginning of the loop - so that if we fail during matching, we'll - reinitialize the bounds. */ - insert_op2 (set_number_at, laststart, b - laststart, - upper_bound - 1, b); - b += 5; - } - } - pending_exact = 0; - beg_interval = NULL; - } - break; - - unfetch_interval: - /* If an invalid interval, match the characters as literals. */ - assert (beg_interval); - p = beg_interval; - beg_interval = NULL; - - /* normal_char and normal_backslash need `c'. */ - PATFETCH (c); - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (p > pattern && p[-1] == '\\') - goto normal_backslash; - } - goto normal_char; - -#ifdef emacs - /* There is no way to specify the before_dot and after_dot - operators. rms says this is ok. --karl */ - case '=': - BUF_PUSH (at_dot); - break; - - case 's': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); - break; - - case 'S': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); - break; -#endif /* emacs */ - - - case 'w': - laststart = b; - BUF_PUSH (wordchar); - break; - - - case 'W': - laststart = b; - BUF_PUSH (notwordchar); - break; - - - case '<': - BUF_PUSH (wordbeg); - break; - - case '>': - BUF_PUSH (wordend); - break; - - case 'b': - BUF_PUSH (wordbound); - break; - - case 'B': - BUF_PUSH (notwordbound); - break; - - case '`': - BUF_PUSH (begbuf); - break; - - case '\'': - BUF_PUSH (endbuf); - break; - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (syntax & RE_NO_BK_REFS) - goto normal_char; - - c1 = c - '0'; - - if (c1 > regnum) - return REG_ESUBREG; - - /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, c1)) - goto normal_char; - - laststart = b; - BUF_PUSH_2 (duplicate, c1); - break; - - - case '+': - case '?': - if (syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backslash; - - default: - normal_backslash: - /* You might think it would be useful for \ to mean - not to translate; but if we don't translate it - it will never match anything. */ - c = TRANSLATE (c); - goto normal_char; - } - break; - - - default: - /* Expects the character in `c'. */ - normal_char: - /* If no exactn currently being built. */ - if (!pending_exact - - /* If last exactn not at current position. */ - || pending_exact + *pending_exact + 1 != b - - /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 - - /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' - || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) - || ((syntax & RE_INTERVALS) - && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) - { - /* Start building a new exactn. */ - - laststart = b; - - BUF_PUSH_2 (exactn, 0); - pending_exact = b - 1; - } - - BUF_PUSH (c); - (*pending_exact)++; - break; - } /* switch (c) */ - } /* while p != pend */ - - - /* Through the pattern now. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - if (!COMPILE_STACK_EMPTY) - return REG_EPAREN; - - free (compile_stack.stack); - - /* We have succeeded; set the length of the buffer. */ - bufp->used = b - bufp->buffer; - -#ifdef DEBUG - if (debug) - { - DEBUG_PRINT1 ("\nCompiled pattern: "); - print_compiled_pattern (bufp); - } -#endif /* DEBUG */ - - return REG_NOERROR; -} /* regex_compile */ - -/* Subroutines for `regex_compile'. */ - -/* Store OP at LOC followed by two-byte integer parameter ARG. */ - -static void -store_op1 (op, loc, arg) - re_opcode_t op; - unsigned char *loc; - int arg; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg); -} - - -/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -store_op2 (op, loc, arg1, arg2) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg1); - STORE_NUMBER (loc + 3, arg2); -} - - -/* Copy the bytes from LOC to END to open up three bytes of space at LOC - for OP followed by two-byte integer parameter ARG. */ - -static void -insert_op1 (op, loc, arg, end) - re_opcode_t op; - unsigned char *loc; - int arg; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 3; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op1 (op, loc, arg); -} - - -/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -insert_op2 (op, loc, arg1, arg2, end) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 5; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op2 (op, loc, arg1, arg2); -} - - -/* P points to just after a ^ in PATTERN. Return true if that ^ comes - after an alternative or a begin-subexpression. We assume there is at - least one character before the ^. */ - -static boolean -at_begline_loc_p (pattern, p, syntax) - const char *pattern, *p; - reg_syntax_t syntax; -{ - const char *prev = p - 2; - boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; - - return - /* After a subexpression? */ - (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) - /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); -} - - -/* The dual of at_begline_loc_p. This one is for $. We assume there is - at least one character after the $, i.e., `P < PEND'. */ - -static boolean -at_endline_loc_p (p, pend, syntax) - const char *p, *pend; - int syntax; -{ - const char *next = p; - boolean next_backslash = *next == '\\'; - const char *next_next = p + 1 < pend ? p + 1 : NULL; - - return - /* Before a subexpression? */ - (syntax & RE_NO_BK_PARENS ? *next == ')' - : next_backslash && next_next && *next_next == ')') - /* Before an alternative? */ - || (syntax & RE_NO_BK_VBAR ? *next == '|' - : next_backslash && next_next && *next_next == '|'); -} - - -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and - false if it's not. */ - -static boolean -group_in_compile_stack (compile_stack, regnum) - compile_stack_type compile_stack; - regnum_t regnum; -{ - int this_element; - - for (this_element = compile_stack.avail - 1; - this_element >= 0; - this_element--) - if (compile_stack.stack[this_element].regnum == regnum) - return true; - - return false; -} - - -/* Read the ending character of a range (in a bracket expression) from the - uncompiled pattern *P_PTR (which ends at PEND). We assume the - starting character is in `P[-2]'. (`P[-1]' is the character `-'.) - Then we set the translation of all bits between the starting and - ending characters (inclusive) in the compiled pattern B. - - Return an error code. - - We use these short variable names so we can use the same macros as - `regex_compile' itself. */ - -static reg_errcode_t -compile_range (p_ptr, pend, translate, syntax, b) - const char **p_ptr, *pend; - char *translate; - reg_syntax_t syntax; - unsigned char *b; -{ - unsigned this_char; - - const char *p = *p_ptr; - int range_start, range_end; - - if (p == pend) - return REG_ERANGE; - - /* Even though the pattern is a signed `char *', we need to fetch - with unsigned char *'s; if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch using a - signed char *. - - We also want to fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - range_start = ((unsigned char *) p)[-2]; - range_end = ((unsigned char *) p)[0]; - - /* Have to increment the pointer into the pattern string, so the - caller isn't still at the ending character. */ - (*p_ptr)++; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- the range is inclusive, so if `range_end' == 0xff - (assuming 8-bit characters), we would otherwise go into an infinite - loop, since all characters <= 0xff. */ - for (this_char = range_start; this_char <= range_end; this_char++) - { - SET_LIST_BIT (TRANSLATE (this_char)); - } - - return REG_NOERROR; -} - -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -#define INIT_FAILURE_ALLOC 5 -#endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Make sure noone compiles this code with a C++ compiler. */ +#ifdef __cplusplus +# error "This is C code, use a C compiler" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +#if defined (_MSC_VER) +#include <stdio.h> /* for size_t */ +#endif + +/* On some systems, limits.h sets RE_DUP_MAX to a lower value than + GNU regex allows. Include it before <regex.h>, which correctly + #undefs RE_DUP_MAX and sets it to the right value. */ +#include <limits.h> + +#ifdef GAWK +#undef alloca +#define alloca alloca_is_bad_you_should_never_use_it +#endif +#include <regex.h> +#include "regex_internal.h" + +#include "regex_internal.c" +#ifdef GAWK +#define bool int +#define true (1) +#define false (0) +#endif +#include "regcomp.c" +#include "regexec.c" + +/* Binary backward compatibility. */ +#if _LIBC +# include <shlib-compat.h> +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") int re_max_failures = 2000; - -typedef const unsigned char *fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} fail_stack_type; - -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) -#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) - - -/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ - -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE requires `destination' be declared. */ - -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ - ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ - REGEX_REALLOCATE ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push PATTERN_OP on FAIL_STACK. - - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (fail_stack)) \ - ? 0 \ - : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ - 1)) - -/* This pushes an item onto the failure stack. Must be a four-byte - value. Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ITEM(item) \ - fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item - -/* The complement operation. Assumes `fail_stack' is nonempty. */ -#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] - -/* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -#define DEBUG_PUSH PUSH_FAILURE_ITEM -#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () -#else -#define DEBUG_PUSH(item) -#define DEBUG_POP(item_addr) -#endif - - -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be - declared. - - Does `return FAILURE_CODE' if runs out of memory. */ - -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - int this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - PUSH_FAILURE_ITEM (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - PUSH_FAILURE_ITEM (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ - PUSH_FAILURE_ITEM (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ - PUSH_FAILURE_ITEM (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_ITEM (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_ITEM (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) - -/* This is the number of items that are pushed and popped on the stack - for each register. */ -#define NUM_REG_ITEMS 3 - -/* Individual items aside from the registers. */ -#ifdef DEBUG -#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -#define NUM_NONREG_ITEMS 4 -#endif - -/* We push at most this many items on the stack. */ -#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) - -/* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ - ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ - + NUM_NONREG_ITEMS) - -/* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) - - -/* Pops what PUSH_FAIL_STACK pushes. - - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ - -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ - int this_reg; \ - const unsigned char *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_ITEM (); \ - if (string_temp != NULL) \ - str = (const char *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (unsigned char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ - \ - low_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ - \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ - \ - regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - } \ - \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ - -/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in - BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible - characters can start a string that matches the pattern. This fastmap - is used by re_search to skip quickly over impossible starting points. - - The caller must supply the address of a (1 << BYTEWIDTH)-byte data - area as BUFP->fastmap. - - We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in - the pattern buffer. - - Returns 0 if we succeed, -2 if an internal error. */ - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - int j, k; - fail_stack_type fail_stack; -#ifndef REGEX_MALLOC - char *destination; -#endif - /* We don't push any register information onto the failure stack. */ - unsigned num_regs = 0; - - register char *fastmap = bufp->fastmap; - unsigned char *pattern = bufp->buffer; - unsigned long size = bufp->used; - const unsigned char *p = pattern; - register unsigned char *pend = pattern + size; - - /* Assume that each path through the pattern can be null until - proven otherwise. We set this false at the bottom of switch - statement, to which we get only if a particular path doesn't - match the empty string. */ - boolean path_can_be_null = true; - - /* We aren't doing a `succeed_n' to begin with. */ - boolean succeed_n_p = false; - - assert (fastmap != NULL && p != NULL); - - INIT_FAIL_STACK (); - bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ - bufp->fastmap_accurate = 1; /* It will be when we're done. */ - bufp->can_be_null = 0; - - while (p != pend || !FAIL_STACK_EMPTY ()) - { - if (p == pend) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail]; - } - - /* We should never be about to go beyond the end of the pattern. */ - assert (p < pend); - -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - - /* I guess the idea here is to simply not bother with a fastmap - if a backreference is used, since it's too hard to figure out - the fastmap for the corresponding group. Setting - `can_be_null' stops `re_search_2' from using the fastmap, so - that is all we do. */ - case duplicate: - bufp->can_be_null = 1; - return 0; - - - /* Following are the cases which match a character. These end - with `break'. */ - - case exactn: - fastmap[p[1]] = 1; - break; - - - case charset: - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) - fastmap[j] = 1; - break; - - - case charset_not: - /* Chars beyond end of map must be allowed. */ - for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; - break; - - - case wordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == Sword) - fastmap[j] = 1; - break; - - - case notwordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != Sword) - fastmap[j] = 1; - break; - - - case anychar: - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = 0; - - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - return 0; - - /* Otherwise, have to check alternative paths. */ - break; - - -#ifdef emacs - case syntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - case notsyntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - /* All cases after this match the empty string. These end with - `continue'. */ - - - case before_dot: - case at_dot: - case after_dot: - continue; -#endif /* not emacs */ - - - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbound: - case notwordbound: - case wordbeg: - case wordend: - case push_dummy_failure: - continue; - - - case jump_n: - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case jump_past_alt: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (j > 0) - continue; - - /* Jump backward implies we just went through the body of a - loop and matched nothing. Opcode jumped to should be - `on_failure_jump' or `succeed_n'. Just treat it like an - ordinary jump. For a * loop, it has pushed its failure - point already; if so, discard that as redundant. */ - if ((re_opcode_t) *p != on_failure_jump - && (re_opcode_t) *p != succeed_n) - continue; - - p++; - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - - /* If what's on the stack is where we are now, pop it. */ - if (!FAIL_STACK_EMPTY () - && fail_stack.stack[fail_stack.avail - 1] == p) - fail_stack.avail--; - - continue; - - - case on_failure_jump: - case on_failure_keep_string_jump: - handle_on_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - - /* For some patterns, e.g., `(a?)?', `p+j' here points to the - end of the pattern. We don't want to push such a point, - since when we restore it above, entering the switch will - increment `p' past the end of the pattern. We don't need - to push such a point since we obviously won't find any more - fastmap entries beyond `pend'. Such a pattern can match - the null string, though. */ - if (p + j < pend) - { - if (!PUSH_PATTERN_OP (p + j, fail_stack)) - return -2; - } - else - bufp->can_be_null = 1; - - if (succeed_n_p) - { - EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ - succeed_n_p = false; - } - - continue; - - - case succeed_n: - /* Get to the number of times to succeed. */ - p += 2; - - /* Increment p past the n for when k != 0. */ - EXTRACT_NUMBER_AND_INCR (k, p); - if (k == 0) - { - p -= 4; - succeed_n_p = true; /* Spaghetti code alert. */ - goto handle_on_failure_jump; - } - continue; - - - case set_number_at: - p += 4; - continue; - - - case start_memory: - case stop_memory: - p += 2; - continue; - - - default: - abort (); /* We have listed all the cases. */ - } /* switch *p++ */ - - /* Getting here means we have found the possible starting - characters for one path of the pattern -- and that the empty - string does not match. We need not follow this path further. - Instead, look at the next alternative (remembered on the - stack), or quit if no more. The test at the top of the loop - does these things. */ - path_can_be_null = false; - p = pend; - } /* while p */ - - /* Set `can_be_null' for the last path (also the first path, if the - pattern is empty). */ - bufp->can_be_null |= path_can_be_null; - return 0; -} /* re_compile_fastmap */ - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; - } -} - -/* Searching routines. */ - -/* Like re_search_2, below, but only one string is specified, and - doesn't let you say where to stop matching. */ - -int -re_search (bufp, string, size, startpos, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, startpos, range; - struct re_registers *regs; -{ - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, - regs, size); -} - - -/* Using the compiled pattern in BUFP->buffer, first tries to match the - virtual concatenation of STRING1 and STRING2, starting first at index - STARTPOS, then at STARTPOS + 1, and so on. - - STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. - - RANGE is how far to scan while trying to match. RANGE = 0 means try - only at STARTPOS; in general, the last start tried is STARTPOS + - RANGE. - - In REGS, return the indices of the virtual concatenation of STRING1 - and STRING2 that matched the entire BUFP->buffer and its contained - subexpressions. - - Do not consider matching one past the index STOP in the virtual - concatenation of STRING1 and STRING2. - - We return either the position in the strings at which the match was - found, -1 if no match, or -2 if error (such as failure - stack overflow). */ - -int -re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int startpos; - int range; - struct re_registers *regs; - int stop; -{ - int val; - register char *fastmap = bufp->fastmap; - register char *translate = bufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; - - /* Check for out-of-range STARTPOS. */ - if (startpos < 0 || startpos > total_size) - return -1; - - /* Fix up RANGE if it might eventually take us outside - the virtual concatenation of STRING1 and STRING2. */ - if (endpos < -1) - range = -1 - startpos; - else if (endpos > total_size) - range = total_size - startpos; - - /* If the search isn't to be a backwards one, don't waste time in a - search for a pattern that must be anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) - { - if (startpos > 0) - return -1; - else - range = 1; - } - - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) - if (re_compile_fastmap (bufp) == -2) - return -2; - - /* Loop through the string, looking for a place to start matching. */ - for (;;) - { - /* If a fastmap is supplied, skip quickly over characters that - cannot be the start of a match. If the pattern can match the - null string, however, we don't need to skip characters; we want - the first null string. */ - if (fastmap && startpos < total_size && !bufp->can_be_null) - { - if (range > 0) /* Searching forwards. */ - { - register const char *d; - register int lim = 0; - int irange = range; - - if (startpos < size1 && startpos + range >= size1) - lim = range - (size1 - startpos); - - d = (startpos >= size1 ? string2 - size1 : string1) + startpos; - - /* Written out as an if-else to avoid testing `translate' - inside the loop. */ - if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) - range--; - else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; - - startpos += irange - range; - } - else /* Searching backwards. */ - { - register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] - : string1[startpos]); - - if (!fastmap[(unsigned char) TRANSLATE (c)]) - goto advance; - } - } - - /* If can't match the null string, and that's all we have left, fail. */ - if (range >= 0 && startpos == total_size && fastmap - && !bufp->can_be_null) - return -1; - - val = re_match_2 (bufp, string1, size1, string2, size2, - startpos, regs, stop); - if (val >= 0) - return startpos; - - if (val == -2) - return -2; - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} /* re_search_2 */ - -/* Declarations and macros for re_match_2. */ - -static int bcmp_translate (); -static boolean alt_match_null_string_p (), - common_op_match_null_string_p (), - group_match_null_string_p (); - -/* Structure for per-register (a.k.a. per-group) information. - This must not be longer than one word, because we push this value - onto the failure stack. Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ -typedef union -{ - fail_stack_elt_t word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} register_info_type; - -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ - do \ - { \ - unsigned r; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - while (0) - - -/* This converts PTR, a pointer into one of the search strings `string1' - and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) - -/* Registers are set to a sentinel when they haven't yet matched. */ -#define REG_UNSET_VALUE ((char *) -1) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - - -/* Macros for dealing with the split strings in re_match_2. */ - -#define MATCHING_IN_FIRST_STRING (dend == end_match_1) - -/* Call before fetching a character with *d. This switches over to - string2 if necessary. */ -#define PREFETCH() \ - while (d == dend) \ - { \ - /* End of string2 => fail. */ \ - if (dend == end_match_2) \ - goto fail; \ - /* End of string1 => advance to string2. */ \ - d = string2; \ - dend = end_match_2; \ - } - - -/* Test if at very beginning or at very end of the virtual concatenation - of `string1' and `string2'. If only one string, it's `string2'. */ -#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END(d) ((d) == end2) - - -/* Test if D points to a character which is word-constituent. We have - two special cases to check for: if past the end of string1, look at - the first character in string2; and if before the beginning of - string2, look at the last character in string1. */ -#define WORDCHAR_P(d) \ - (SYNTAX ((d) == end1 ? *string2 \ - : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ - == Sword) - -/* Test if the character before D and the one at D differ with respect - to being word-constituent. */ -#define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ - || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - - -/* Free everything we malloc. */ -#ifdef REGEX_MALLOC -#define FREE_VAR(var) if (var) free (var); var = NULL -#define FREE_VARIABLES() \ - do { \ - FREE_VAR (fail_stack.stack); \ - FREE_VAR (regstart); \ - FREE_VAR (regend); \ - FREE_VAR (old_regstart); \ - FREE_VAR (old_regend); \ - FREE_VAR (best_regstart); \ - FREE_VAR (best_regend); \ - FREE_VAR (reg_info); \ - FREE_VAR (reg_dummy); \ - FREE_VAR (reg_info_dummy); \ - } while (0) -#else /* not REGEX_MALLOC */ -/* Some MIPS systems (at least) want this to free alloca'd storage. */ -#define FREE_VARIABLES() alloca (0) -#endif /* not REGEX_MALLOC */ - - -/* These values must meet several constraints. They must not be valid - register values; since we have a limit of 255 registers (because - we use only one byte in the pattern for the register number), we can - use numbers larger than 255. They must differ by 1, because of - NUM_FAILURE_ITEMS above. And the value for the lowest register must - be larger than the value for the highest register, so we do not try - to actually save any registers when none are active. */ -#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) -#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) - -/* Matching routines. */ - -#ifndef emacs /* Emacs never uses this. */ -/* re_match is like re_match_2 except it takes only a single string. */ - -int -re_match (bufp, string, size, pos, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, pos; - struct re_registers *regs; - { - return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); -} -#endif /* not emacs */ - - -/* re_match_2 matches the compiled pattern in BUFP against the - the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 - and SIZE2, respectively). We start matching at POS, and stop - matching at STOP. - - If REGS is non-null and the `no_sub' field of BUFP is nonzero, we - store offsets for the substring each group matched in REGS. See the - documentation for exactly how many groups we fill. - - We return -1 if no match, -2 if an internal error (such as the - failure stack overflowing). Otherwise, we return the length of the - matched substring. */ - -int -re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - /* General temporaries. */ - int mcnt; - unsigned char *p1; - - /* Just past the end of the corresponding string. */ - const char *end1, *end2; - - /* Pointers into string1 and string2, just past the last characters in - each to consider matching. */ - const char *end_match_1, *end_match_2; - - /* Where we are in the data, and the end of the current string. */ - const char *d, *dend; - - /* Where we are in the pattern, and the end of the pattern. */ - unsigned char *p = bufp->buffer; - register unsigned char *pend = p + bufp->used; - - /* We use this to map every character in the string. */ - char *translate = bufp->translate; - - /* Failure point stack. Each place that can handle a failure further - down the line pushes a failure point on this stack. It consists of - restart, regend, and reg_info for all registers corresponding to - the subexpressions we're currently inside, plus the number of such - registers, and, finally, two char *'s. The first char * is where - to resume scanning the pattern; the second one is where to resume - scanning the strings. If the latter is zero, the failure point is - a ``dummy''; if a failure happens and the failure point is a dummy, - it gets discarded and the next next one is tried. */ - fail_stack_type fail_stack; -#ifdef DEBUG - static unsigned failure_id = 0; - unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; -#endif - - /* We fill all the registers internally, independent of what we - return, for use in backreferences. The number here includes - an element for register zero. */ - unsigned num_regs = bufp->re_nsub + 1; - - /* The currently active registers. */ - unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; - unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; - - /* Information on the contents of registers. These are pointers into - the input strings; they record just what was matched (on this - attempt) by a subexpression part of the pattern, that is, the - regnum-th regstart pointer points to where in the pattern we began - matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ - const char **regstart = NULL, **regend = NULL; - - /* If a group that's operated upon by a repetition operator fails to - match anything, then the register for its start will need to be - restored because it will have been set to wherever in the string we - are when we last see its open-group operator. Similarly for a - register's end. */ - const char **old_regstart = NULL, **old_regend = NULL; - - /* The is_active field of reg_info helps us keep track of which (possibly - nested) subexpressions we are currently in. The matched_something - field of reg_info[reg_num] helps us tell whether or not we have - matched any of the pattern so far this time through the reg_num-th - subexpression. These two fields get reset each time through any - loop their register is in. */ - register_info_type *reg_info = NULL; - - /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. - This happens as we backtrack through the failure points, which in - turn happens only if we have not yet matched the entire string. */ - unsigned best_regs_set = false; - const char **best_regstart = NULL, **best_regend = NULL; - - /* Logically, this is `best_regend[0]'. But we don't want to have to - allocate space for that if we're not allocating space for anything - else (see below). Also, we never need info about register 0 for - any of the other register vectors, and it seems rather a kludge to - treat `best_regend' differently than the rest. So we keep track of - the end of the best match so far in a separate variable. We - initialize this to NULL so that when we backtrack the first time - and need to test it, it's not garbage. */ - const char *match_end = NULL; - - /* Used when we pop values we don't care about. */ - const char **reg_dummy = NULL; - register_info_type *reg_info_dummy = NULL; - -#ifdef DEBUG - /* Counts the total number of registers pushed. */ - unsigned num_regs_pushed = 0; -#endif - - DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); - - INIT_FAIL_STACK (); - - /* Do not bother to initialize all the register variables if there are - no groups in the pattern, as it takes a fair amount of time. If - there are groups, we include space for register 0 (the whole - pattern), even though we never use it, since it simplifies the - array indexing. We should fix this. */ - if (bufp->re_nsub) - { - regstart = REGEX_TALLOC (num_regs, const char *); - regend = REGEX_TALLOC (num_regs, const char *); - old_regstart = REGEX_TALLOC (num_regs, const char *); - old_regend = REGEX_TALLOC (num_regs, const char *); - best_regstart = REGEX_TALLOC (num_regs, const char *); - best_regend = REGEX_TALLOC (num_regs, const char *); - reg_info = REGEX_TALLOC (num_regs, register_info_type); - reg_dummy = REGEX_TALLOC (num_regs, const char *); - reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); - - if (!(regstart && regend && old_regstart && old_regend && reg_info - && best_regstart && best_regend && reg_dummy && reg_info_dummy)) - { - FREE_VARIABLES (); - return -2; - } - } -#ifdef REGEX_MALLOC - else - { - /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. */ - regstart = regend = old_regstart = old_regend = best_regstart - = best_regend = reg_dummy = NULL; - reg_info = reg_info_dummy = (register_info_type *) NULL; - } -#endif /* REGEX_MALLOC */ - - /* The starting position is bogus. */ - if (pos < 0 || pos > size1 + size2) - { - FREE_VARIABLES (); - return -1; - } - - /* Initialize subexpression text positions to -1 to mark ones that no - start_memory/stop_memory has been seen for. Also initialize the - register information struct. */ - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = regend[mcnt] - = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; - - REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; - IS_ACTIVE (reg_info[mcnt]) = 0; - MATCHED_SOMETHING (reg_info[mcnt]) = 0; - EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; - } - - /* We move `string1' into `string2' if the latter's empty -- but not if - `string1' is null. */ - if (size2 == 0 && string1 != NULL) - { - string2 = string1; - size2 = size1; - string1 = 0; - size1 = 0; - } - end1 = string1 + size1; - end2 = string2 + size2; - - /* Compute where to stop matching, within the two strings. */ - if (stop <= size1) - { - end_match_1 = string1 + stop; - end_match_2 = string2; - } - else - { - end_match_1 = end1; - end_match_2 = string2 + stop - size1; - } - - /* `p' scans through the pattern as `d' scans through the data. - `dend' is the end of the input string that `d' points within. `d' - is advanced into the following input string whenever necessary, but - this happens before fetching; therefore, at the beginning of the - loop, `d' can be pointing at the end of a string, but it cannot - equal `string2'. */ - if (size1 > 0 && pos <= size1) - { - d = string1 + pos; - dend = end_match_1; - } - else - { - d = string2 + pos - size1; - dend = end_match_2; - } - - DEBUG_PRINT1 ("The compiled pattern is: "); - DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); - DEBUG_PRINT1 ("The string to match is: `"); - DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); - DEBUG_PRINT1 ("'\n"); - - /* This loops over pattern commands. It exits by returning from the - function if the match is complete, or it drops through if the match - fails at this starting point in the input data. */ - for (;;) - { - DEBUG_PRINT2 ("\n0x%x: ", p); - - if (p == pend) - { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("end of pattern ... "); - - /* If we haven't matched the entire string, and we want the - longest match, try backtracking. */ - if (d != end_match_2) - { - DEBUG_PRINT1 ("backtracking.\n"); - - if (!FAIL_STACK_EMPTY ()) - { /* More failure points to try. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); - - /* If exceeds best match so far, save it. */ - if (!best_regs_set - || (same_str_p && d > match_end) - || (!same_str_p && !MATCHING_IN_FIRST_STRING)) - { - best_regs_set = true; - match_end = d; - - DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - best_regstart[mcnt] = regstart[mcnt]; - best_regend[mcnt] = regend[mcnt]; - } - } - goto fail; - } - - /* If no failure points, don't restore garbage. */ - else if (best_regs_set) - { - restore_best_regs: - /* Restore best match. It may happen that `dend == - end_match_1' while the restored d is in string2. - For example, the pattern `x.*y.*z' against the - strings `x-' and `y-z-', if the two strings are - not consecutive in memory. */ - DEBUG_PRINT1 ("Restoring best registers.\n"); - - d = match_end; - dend = ((d >= string1 && d <= end1) - ? end_match_1 : end_match_2); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = best_regstart[mcnt]; - regend[mcnt] = best_regend[mcnt]; - } - } - } /* d != end_match_2 */ - - DEBUG_PRINT1 ("Accepting match.\n"); - - /* If caller wants register contents data back, do it. */ - if (regs && !bufp->no_sub) - { - /* Have the register data arrays been allocated? */ - if (bufp->regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. We need one - extra element beyond `num_regs' for the `-1' marker - GNU code uses. */ - regs->num_regs = MAX (RE_NREGS, num_regs + 1); - regs->start = TALLOC (regs->num_regs, regoff_t); - regs->end = TALLOC (regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - bufp->regs_allocated = REGS_REALLOCATE; - } - else if (bufp->regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (regs->num_regs < num_regs + 1) - { - regs->num_regs = num_regs + 1; - RETALLOC (regs->start, regs->num_regs, regoff_t); - RETALLOC (regs->end, regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - } - } - else - assert (bufp->regs_allocated == REGS_FIXED); - - /* Convert the pointer data in `regstart' and `regend' to - indices. Register zero has to be set differently, - since we haven't kept track of any info for it. */ - if (regs->num_regs > 0) - { - regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 - : d - string2 + size1); - } - - /* Go through the first `min (num_regs, regs->num_regs)' - registers, since that is all we initialized. */ - for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) - { - if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) - regs->start[mcnt] = regs->end[mcnt] = -1; - else - { - regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); - } - } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; - } /* regs && !bufp->no_sub */ - - FREE_VARIABLES (); - DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", - nfailure_points_pushed, nfailure_points_popped, - nfailure_points_pushed - nfailure_points_popped); - DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); - - mcnt = d - pos - (MATCHING_IN_FIRST_STRING - ? string1 - : string2 - size1); - - DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); - - return mcnt; - } - - /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - /* Ignore these. Used to ignore the n of succeed_n's which - currently have n == 0. */ - case no_op: - DEBUG_PRINT1 ("EXECUTING no_op.\n"); - break; - - - /* Match the next n pattern characters exactly. The following - byte in the pattern defines n, and the n bytes after that - are the characters to match. */ - case exactn: - mcnt = *p++; - DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); - - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (translate) - { - do - { - PREFETCH (); - if (translate[(unsigned char) *d++] != (char) *p++) - goto fail; - } - while (--mcnt); - } - else - { - do - { - PREFETCH (); - if (*d++ != (char) *p++) goto fail; - } - while (--mcnt); - } - SET_REGS_MATCHED (); - break; - - - /* Match any character except possibly a newline or a null. */ - case anychar: - DEBUG_PRINT1 ("EXECUTING anychar.\n"); - - PREFETCH (); - - if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') - || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) - goto fail; - - SET_REGS_MATCHED (); - DEBUG_PRINT2 (" Matched `%d'.\n", *d); - d++; - break; - - - case charset: - case charset_not: - { - register unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; - - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); - - PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ - - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - p += 1 + *p; - - if (!not) goto fail; - - SET_REGS_MATCHED (); - d++; - break; - } - - - /* The beginning of a group is represented by start_memory. - The arguments are the register number in the next byte, and the - number of groups inner to this one in the next. The text - matched within the group is recorded (in the internal - registers data structure) under the register number. */ - case start_memory: - DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); - - /* Find out if this group can match the empty string. */ - p1 = p; /* To send to group_match_null_string_p. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[*p]) - = group_match_null_string_p (&p1, pend, reg_info); - - /* Save the position in the string where we were the last time - we were at this open-group operator in case the group is - operated upon by a repetition operator, e.g., with `(a*)*b' - against `ab'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regstart[*p]) ? d : regstart[*p] - : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", - POINTER_TO_OFFSET (old_regstart[*p])); - - regstart[*p] = d; - DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); - - IS_ACTIVE (reg_info[*p]) = 1; - MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* This is the new highest active register. */ - highest_active_reg = *p; - - /* If nothing was active before, this is the new lowest active - register. */ - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *p; - - /* Move past the register number and inner group count. */ - p += 2; - break; - - - /* The stop_memory opcode represents the end of a group. Its - arguments are the same as start_memory's: the register - number, and the number of inner groups. */ - case stop_memory: - DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); - - /* We need to save the string position the last time we were at - this close-group operator in case the group is operated - upon by a repetition operator, e.g., with `((a*)*(b*)*)*' - against `aba'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regend[*p]) ? d : regend[*p] - : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", - POINTER_TO_OFFSET (old_regend[*p])); - - regend[*p] = d; - DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); - - /* This register isn't active anymore. */ - IS_ACTIVE (reg_info[*p]) = 0; - - /* If this was the only register active, nothing is active - anymore. */ - if (lowest_active_reg == highest_active_reg) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - { /* We must scan for the new highest active register, since - it isn't necessarily one less than now: consider - (a(b)c(d(e)f)g). When group 3 ends, after the f), the - new highest active register is 1. */ - unsigned char r = *p - 1; - while (r > 0 && !IS_ACTIVE (reg_info[r])) - r--; - - /* If we end up at register zero, that means that we saved - the registers as the result of an `on_failure_jump', not - a `start_memory', and we jumped to past the innermost - `stop_memory'. For example, in ((.)*) we save - registers 1 and 2 as a result of the *, but when we pop - back to the second ), we are at the stop_memory 1. - Thus, nothing is active. */ - if (r == 0) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - highest_active_reg = r; - } - - /* If just failed to match something this time around with a - group that's operated on by a repetition operator, try to - force exit from the ``loop'', and restore the register - information for this group that we had before trying this - last match. */ - if ((!MATCHED_SOMETHING (reg_info[*p]) - || (re_opcode_t) p[-3] == start_memory) - && (p + 2) < pend) - { - boolean is_a_jump_n = false; - - p1 = p + 2; - mcnt = 0; - switch ((re_opcode_t) *p1++) - { - case jump_n: - is_a_jump_n = true; - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (is_a_jump_n) - p1 += 2; - break; - - default: - /* do nothing */ ; - } - p1 += mcnt; - - /* If the next operation is a jump backwards in the pattern - to an on_failure_jump right before the start_memory - corresponding to this stop_memory, exit from the loop - by forcing a failure after pushing on the stack the - on_failure_jump's jump in the pattern, and d. */ - if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump - && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) - { - /* If this group ever matched anything, then restore - what its registers were before trying this last - failed match, e.g., with `(a*)*b' against `ab' for - regstart[1], and, e.g., with `((a*)*(b*)*)*' - against `aba' for regend[3]. - - Also restore the registers for inner groups for, - e.g., `((a*)(b*))*' against `aba' (register 3 would - otherwise get trashed). */ - - if (EVER_MATCHED_SOMETHING (reg_info[*p])) - { - unsigned r; - - EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* Restore this and inner groups' (if any) registers. */ - for (r = *p; r < *p + *(p + 1); r++) - { - regstart[r] = old_regstart[r]; - - /* xx why this test? */ - if ((int) old_regend[r] >= (int) regstart[r]) - regend[r] = old_regend[r]; - } - } - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - PUSH_FAILURE_POINT (p1 + mcnt, d, -2); - - goto fail; - } - } - - /* Move past the register number and the inner group count. */ - p += 2; - break; - - - /* \<digit> has been turned into a `duplicate' command which is - followed by the numeric value of <digit> as the register number. */ - case duplicate: - { - register const char *d2, *dend2; - int regno = *p++; /* Get which register to match against. */ - DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); - - /* Can't back reference a group which we've never matched. */ - if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) - goto fail; - - /* Where in input to try to start matching. */ - d2 = regstart[regno]; - - /* Where to stop matching; if both the place to start and - the place to stop matching are in the same string, then - set to the place to stop, otherwise, for now have to use - the end of the first string. */ - - dend2 = ((FIRST_STRING_P (regstart[regno]) - == FIRST_STRING_P (regend[regno])) - ? regend[regno] : end_match_1); - for (;;) - { - /* If necessary, advance to next segment in register - contents. */ - while (d2 == dend2) - { - if (dend2 == end_match_2) break; - if (dend2 == regend[regno]) break; - - /* End of string1 => advance to string2. */ - d2 = string2; - dend2 = regend[regno]; - } - /* At end of register contents => success */ - if (d2 == dend2) break; - - /* If necessary, advance to next segment in data. */ - PREFETCH (); - - /* How many characters left in this segment to match. */ - mcnt = dend - d; - - /* Want how many consecutive characters we can match in - one shot, so, if necessary, adjust the count. */ - if (mcnt > dend2 - d2) - mcnt = dend2 - d2; - - /* Compare that many; failure if mismatch, else move - past them. */ - if (translate - ? bcmp_translate (d, d2, mcnt, translate) - : bcmp (d, d2, mcnt)) - goto fail; - d += mcnt, d2 += mcnt; - } - } - break; - - - /* begline matches the empty string at the beginning of the string - (unless `not_bol' is set in `bufp'), and, if - `newline_anchor' is set, after newlines. */ - case begline: - DEBUG_PRINT1 ("EXECUTING begline.\n"); - - if (AT_STRINGS_BEG (d)) - { - if (!bufp->not_bol) break; - } - else if (d[-1] == '\n' && bufp->newline_anchor) - { - break; - } - /* In all other cases, we fail. */ - goto fail; - - - /* endline is the dual of begline. */ - case endline: - DEBUG_PRINT1 ("EXECUTING endline.\n"); - - if (AT_STRINGS_END (d)) - { - if (!bufp->not_eol) break; - } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) - { - break; - } - goto fail; - - - /* Match at the very beginning of the data. */ - case begbuf: - DEBUG_PRINT1 ("EXECUTING begbuf.\n"); - if (AT_STRINGS_BEG (d)) - break; - goto fail; - - - /* Match at the very end of the data. */ - case endbuf: - DEBUG_PRINT1 ("EXECUTING endbuf.\n"); - if (AT_STRINGS_END (d)) - break; - goto fail; - - - /* on_failure_keep_string_jump is used to optimize `.*\n'. It - pushes NULL as the value for the string on the stack. Then - `pop_failure_point' will keep the current value for the - string, instead of restoring it. To see why, consider - matching `foo\nbar' against `.*\n'. The .* matches the foo; - then the . fails against the \n. But the next thing we want - to do is match the \n against the \n; if we restored the - string value, we would be back at the foo. - - Because this is used only in specific cases, we don't need to - check all the things that `on_failure_jump' does, to make - sure the right things get saved on the stack. Hence we don't - share its code. The only reason to push anything on the - stack at all is that otherwise we would have to change - `anychar's code to do something besides goto fail in this - case; that seems worse than this. */ - case on_failure_keep_string_jump: - DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); - - PUSH_FAILURE_POINT (p + mcnt, NULL, -2); - break; - - - /* Uses of on_failure_jump: - - Each alternative starts with an on_failure_jump that points - to the beginning of the next alternative. Each alternative - except the last ends with a jump that in effect jumps past - the rest of the alternatives. (They really jump to the - ending jump of the following alternative, because tensioning - these jumps is a hassle.) - - Repeats start with an on_failure_jump that points past both - the repetition text and either the following jump or - pop_failure_jump back to this on_failure_jump. */ - case on_failure_jump: - on_failure: - DEBUG_PRINT1 ("EXECUTING on_failure_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); - - /* If this on_failure_jump comes right before a group (i.e., - the original * applied to a group), save the information - for that group and all inner ones, so that if we fail back - to this point, the group's information will be correct. - For example, in \(a*\)*\1, we need the preceding group, - and in \(\(a*\)b*\)\2, we need the inner group. */ - - /* We can't use `p' to check ahead because we push - a failure point to `p + mcnt' after we do this. */ - p1 = p; - - /* We need to skip no_op's before we look for the - start_memory in case this on_failure_jump is happening as - the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 - against aba. */ - while (p1 < pend && (re_opcode_t) *p1 == no_op) - p1++; - - if (p1 < pend && (re_opcode_t) *p1 == start_memory) - { - /* We have a new highest active register now. This will - get reset at the start_memory we are about to get to, - but we will have saved all the registers relevant to - this repetition op, as described above. */ - highest_active_reg = *(p1 + 1) + *(p1 + 2); - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *(p1 + 1); - } - - DEBUG_PRINT1 (":\n"); - PUSH_FAILURE_POINT (p + mcnt, d, -2); - break; - - - /* A smart repeat ends with `maybe_pop_jump'. - We change it to either `pop_failure_jump' or `jump'. */ - case maybe_pop_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); - { - register unsigned char *p2 = p; - - /* Compare the beginning of the repeat with what in the - pattern follows its end. If we can establish that there - is nothing that they would both match, i.e., that we - would have to backtrack because of (as in, e.g., `a*a') - then we can change to pop_failure_jump, because we'll - never have to backtrack. - - This is not true in the case of alternatives: in - `(a|ab)*' we do need to backtrack to the `ab' alternative - (e.g., if the string was `ab'). But instead of trying to - detect that here, the alternative has put on a dummy - failure point which is what we will end up popping. */ - - /* Skip over open/close-group commands. */ - while (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; /* Skip over args, too. */ - - /* If we're at the end of the pattern, we can change. */ - if (p2 == pend) - { - /* Consider what happens when matching ":\(.*\)" - against ":/". I don't really understand this code - yet. */ - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 - (" End of pattern: change to `pop_failure_jump'.\n"); - } - - else if ((re_opcode_t) *p2 == exactn - || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) - { - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; - p1 = p + mcnt; - - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ - if ((re_opcode_t) p1[3] == exactn && p1[5] != c) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset - || (re_opcode_t) p1[3] == charset_not) - { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned char) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - /* `not' is equal to 1 if c would match, which means - that we can't change to pop_failure_jump. */ - if (!not) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - } - p -= 2; /* Point at relative address again. */ - if ((re_opcode_t) p[-1] != pop_failure_jump) - { - p[-1] = (unsigned char) jump; - DEBUG_PRINT1 (" Match => jump.\n"); - goto unconditional_jump; - } - /* Note fall through. */ - - - /* The end of a simple repeat has a pop_failure_jump back to - its matching on_failure_jump, where the latter will push a - failure point. The pop_failure_jump takes off failure - points put on by this pop_failure_jump's matching - on_failure_jump; we got through the pattern to here from the - matching on_failure_jump, so didn't fail. */ - case pop_failure_jump: - { - /* We need to pass separate storage for the lowest and - highest registers, even though we don't care about the - actual values. Otherwise, we will restore only one - register from the stack, since lowest will == highest in - `pop_failure_point'. */ - unsigned dummy_low_reg, dummy_high_reg; - unsigned char *pdummy; - const char *sdummy; - - DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); - POP_FAILURE_POINT (sdummy, pdummy, - dummy_low_reg, dummy_high_reg, - reg_dummy, reg_dummy, reg_info_dummy); - } - /* Note fall through. */ - - - /* Unconditionally jump (without popping any failure points). */ - case jump: - unconditional_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ - DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); - p += mcnt; /* Do the jump. */ - DEBUG_PRINT2 ("(to 0x%x).\n", p); - break; - - - /* We need this opcode so we can detect where alternatives end - in `group_match_null_string_p' et al. */ - case jump_past_alt: - DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); - goto unconditional_jump; - - - /* Normally, the on_failure_jump pushes a failure point, which - then gets popped at pop_failure_jump. We will end up at - pop_failure_jump, also, and with a pattern of, say, `a+', we - are skipping over the on_failure_jump, so we have to push - something meaningless for pop_failure_jump to pop. */ - case dummy_failure_jump: - DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); - /* It doesn't matter what we push for the string here. What - the code at `fail' tests is the value for the pattern. */ - PUSH_FAILURE_POINT (0, 0, -2); - goto unconditional_jump; - - - /* At the end of an alternative, we need to push a dummy failure - point in case we are followed by a `pop_failure_jump', because - we don't want the failure point for the alternative to be - popped. For example, matching `(a|ab)*' against `aab' - requires that we match the `ab' alternative. */ - case push_dummy_failure: - DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); - /* See comments just above at `dummy_failure_jump' about the - two zeroes. */ - PUSH_FAILURE_POINT (0, 0, -2); - break; - - /* Have to succeed matching what follows at least n times. - After that, handle like `on_failure_jump'. */ - case succeed_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); - - assert (mcnt >= 0); - /* Originally, this is how many times we HAVE to succeed. */ - if (mcnt > 0) - { - mcnt--; - p += 2; - STORE_NUMBER_AND_INCR (p, mcnt); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); - } - else if (mcnt == 0) - { - DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); - p[2] = (unsigned char) no_op; - p[3] = (unsigned char) no_op; - goto on_failure; - } - break; - - case jump_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); - - /* Originally, this is how many times we CAN jump. */ - if (mcnt) - { - mcnt--; - STORE_NUMBER (p + 2, mcnt); - goto unconditional_jump; - } - /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; - break; - - case set_number_at: - { - DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p1 = p + mcnt; - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); - STORE_NUMBER (p1, mcnt); - break; - } - - case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - break; - goto fail; - - case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - goto fail; - break; - - case wordbeg: - DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); - if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) - break; - goto fail; - - case wordend: - DEBUG_PRINT1 ("EXECUTING wordend.\n"); - if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) - && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) - break; - goto fail; - -#ifdef emacs -#ifdef emacs19 - case before_dot: - DEBUG_PRINT1 ("EXECUTING before_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) >= point) - goto fail; - break; - - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) != point) - goto fail; - break; - - case after_dot: - DEBUG_PRINT1 ("EXECUTING after_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) <= point) - goto fail; - break; -#else /* not emacs19 */ - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) - goto fail; - break; -#endif /* not emacs19 */ - - case syntaxspec: - DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchsyntax; - - case wordchar: - DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); - mcnt = (int) Sword; - matchsyntax: - PREFETCH (); - if (SYNTAX (*d++) != (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - - case notsyntaxspec: - DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchnotsyntax; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); - mcnt = (int) Sword; - matchnotsyntax: - PREFETCH (); - if (SYNTAX (*d++) == (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - -#else /* not emacs */ - case wordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); - PREFETCH (); - if (!WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); - PREFETCH (); - if (WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; -#endif /* not emacs */ - - default: - abort (); - } - continue; /* Successfully executed one pattern command; keep going. */ - - - /* We goto here if a matching operation fails. */ - fail: - if (!FAIL_STACK_EMPTY ()) - { /* A restart point is known. Restore to that state. */ - DEBUG_PRINT1 ("\nFAIL:\n"); - POP_FAILURE_POINT (d, p, - lowest_active_reg, highest_active_reg, - regstart, regend, reg_info); - - /* If this failure point is a dummy, try the next one. */ - if (!p) - goto fail; - - /* If we failed to the end of the pattern, don't examine *p. */ - assert (p <= pend); - if (p < pend) - { - boolean is_a_jump_n = false; - - /* If failed to a backwards jump that's part of a repetition - loop, need to pop this failure point and use the next one. */ - switch ((re_opcode_t) *p) - { - case jump_n: - is_a_jump_n = true; - case maybe_pop_jump: - case pop_failure_jump: - case jump: - p1 = p + 1; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - - if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) - || (!is_a_jump_n - && (re_opcode_t) *p1 == on_failure_jump)) - goto fail; - break; - default: - /* do nothing */ ; - } - } - - if (d >= string1 && d <= end1) - dend = end_match_1; - } - else - break; /* Matching at this starting point really fails. */ - } /* for (;;) */ - - if (best_regs_set) - goto restore_best_regs; - - FREE_VARIABLES (); - - return -1; /* Failure to match. */ -} /* re_match_2 */ - -/* Subroutine definitions for re_match_2. */ - - -/* We are passed P pointing to a register number after a start_memory. - - Return true if the pattern up to the corresponding stop_memory can - match the empty string, and false otherwise. - - If we find the matching stop_memory, sets P to point to one past its number. - Otherwise, sets P to an undefined byte less than or equal to END. - - We don't handle duplicates properly (yet). */ - -static boolean -group_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - /* Point to after the args to the start_memory. */ - unsigned char *p1 = *p + 2; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and return true or - false, as appropriate, when we get to one that can't, or to the - matching stop_memory. */ - - switch ((re_opcode_t) *p1) - { - /* Could be either a loop or a series of alternatives. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - /* If the next operation is not a jump backwards in the - pattern. */ - - if (mcnt >= 0) - { - /* Go through the on_failure_jumps of the alternatives, - seeing if any of the alternatives cannot match nothing. - The last alternative starts with only a jump, - whereas the rest start with on_failure_jump and end - with a jump, e.g., here is the pattern for `a|b|c': - - /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 - /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 - /exactn/1/c - - So, we have to first go through the first (n-1) - alternatives and then deal with the last one separately. */ - - - /* Deal with the first (n-1) alternatives, which start - with an on_failure_jump (see above) that jumps to right - past a jump_past_alt. */ - - while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) - { - /* `mcnt' holds how many bytes long the alternative - is, including the ending `jump_past_alt' and - its number. */ - - if (!alt_match_null_string_p (p1, p1 + mcnt - 3, - reg_info)) - return false; - - /* Move to right after this alternative, including the - jump_past_alt. */ - p1 += mcnt; - - /* Break if it's the beginning of an n-th alternative - that doesn't begin with an on_failure_jump. */ - if ((re_opcode_t) *p1 != on_failure_jump) - break; - - /* Still have to check that it's not an n-th - alternative that starts with an on_failure_jump. */ - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) - { - /* Get to the beginning of the n-th alternative. */ - p1 -= 3; - break; - } - } - - /* Deal with the last alternative: go back and get number - of the `jump_past_alt' just before it. `mcnt' contains - the length of the alternative. */ - EXTRACT_NUMBER (mcnt, p1 - 2); - - if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) - return false; - - p1 += mcnt; /* Get past the n-th alternative. */ - } /* if mcnt > 0 */ - break; - - - case stop_memory: - assert (p1[1] == **p); - *p = p1 + 2; - return true; - - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return false; -} /* group_match_null_string_p */ - - -/* Similar to group_match_null_string_p, but doesn't deal with alternatives: - It expects P to be the first byte of a single alternative and END one - byte past the last. The alternative can contain groups. */ - -static boolean -alt_match_null_string_p (p, end, reg_info) - unsigned char *p, *end; - register_info_type *reg_info; -{ - int mcnt; - unsigned char *p1 = p; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and break when we get - to one that can't. */ - - switch ((re_opcode_t) *p1) - { - /* It's a loop. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - break; - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return true; -} /* alt_match_null_string_p */ - - -/* Deals with the ops common to group_match_null_string_p and - alt_match_null_string_p. - - Sets P to one after the op and its arguments, if any. */ - -static boolean -common_op_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - boolean ret; - int reg_no; - unsigned char *p1 = *p; - - switch ((re_opcode_t) *p1++) - { - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbeg: - case wordend: - case wordbound: - case notwordbound: -#ifdef emacs - case before_dot: - case at_dot: - case after_dot: +# endif #endif - break; - - case start_memory: - reg_no = *p1; - assert (reg_no > 0 && reg_no <= MAX_REGNUM); - ret = group_match_null_string_p (&p1, end, reg_info); - - /* Have to set this here in case we're checking a group which - contains a group and a back reference to it. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; - - if (!ret) - return false; - break; - - /* If this is an optimized succeed_n for zero times, make the jump. */ - case jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (mcnt >= 0) - p1 += mcnt; - else - return false; - break; - - case succeed_n: - /* Get to the number of times to succeed. */ - p1 += 2; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - if (mcnt == 0) - { - p1 -= 4; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - } - else - return false; - break; - - case duplicate: - if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) - return false; - break; - - case set_number_at: - p1 += 4; - - default: - /* All other opcodes mean we cannot match the empty string. */ - return false; - } - - *p = p1; - return true; -} /* common_op_match_null_string_p */ - - -/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN - bytes; nonzero otherwise. */ - -static int -bcmp_translate( - unsigned char *s1, - unsigned char *s2, - int len, - char *translate -) -{ - register unsigned char *p1 = s1, *p2 = s2; - while (len) - { - if (translate[*p1++] != translate[*p2++]) return 1; - len--; - } - return 0; -} - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length SIZE) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. - - We call regex_compile to do the actual compilation. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - int length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub. */ - bufp->no_sub = 0; - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = regex_compile (pattern, length, re_syntax_options, bufp); - - return re_error_msg[(int) ret]; -} - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them if this is an Emacs or POSIX compilation. */ - -#if !defined (emacs) && !defined (_POSIX_SOURCE) - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - - if (!s) - { - if (!re_comp_buf.buffer) - return "No previous regular expression"; - return 0; - } - - if (!re_comp_buf.buffer) - { - re_comp_buf.buffer = (unsigned char *) malloc (200); - if (re_comp_buf.buffer == NULL) - return "Memory exhausted"; - re_comp_buf.allocated = 200; - - re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); - if (re_comp_buf.fastmap == NULL) - return "Memory exhausted"; - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - /* Yes, we're discarding `const' here. */ - return (char *) re_error_msg[(int) ret]; -} - - -int -re_exec (s) - const char *s; -{ - const int len = strlen (s); - return - 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); -} -#endif /* not emacs and not _POSIX_SOURCE */ - -/* POSIX.2 functions. Don't define these for Emacs. */ - -#ifndef emacs - -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' and `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *preg; - const char *pattern; - int cflags; -{ - reg_errcode_t ret; - unsigned syntax - = (cflags & REG_EXTENDED) ? - RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; - - /* regex_compile will allocate the space for the compiled pattern. */ - preg->buffer = 0; - preg->allocated = 0; - - /* Don't bother to use a fastmap when searching. This simplifies the - REG_NEWLINE case: if we used a fastmap, we'd have to put all the - characters after newlines into the fastmap. This way, we just try - every character. */ - preg->fastmap = 0; - - if (cflags & REG_ICASE) - { - unsigned i; - - preg->translate = (char *) malloc (CHAR_SET_SIZE); - if (preg->translate == NULL) - return (int) REG_ESPACE; - - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? tolower (i) : i; - } - else - preg->translate = NULL; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - - preg->no_sub = !!(cflags & REG_NOSUB); - - /* POSIX says a null character in the pattern terminates it, so we - can use strlen here in compiling the pattern. */ - ret = regex_compile (pattern, strlen (pattern), syntax, preg); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) ret = REG_EPAREN; - - return (int) ret; -} - - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *preg; - const char *string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - int ret; - struct re_registers regs; - regex_t private_preg; - int len = strlen (string); - boolean want_reg_info = !preg->no_sub && nmatch > 0; - - private_preg = *preg; - - private_preg.not_bol = !!(eflags & REG_NOTBOL); - private_preg.not_eol = !!(eflags & REG_NOTEOL); - - /* The user has told us exactly how many registers to return - information about, via `nmatch'. We have to pass that on to the - matching routines. */ - private_preg.regs_allocated = REGS_FIXED; - - if (want_reg_info) - { - regs.num_regs = nmatch; - regs.start = TALLOC (nmatch, regoff_t); - regs.end = TALLOC (nmatch, regoff_t); - if (regs.start == NULL || regs.end == NULL) - return (int) REG_NOMATCH; - } - - /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, - /* start: */ 0, /* range: */ len, - want_reg_info ? ®s : (struct re_registers *) 0); - - /* Copy the register information to the POSIX structure. */ - if (want_reg_info) - { - if (ret >= 0) - { - unsigned r; - - for (r = 0; r < nmatch; r++) - { - pmatch[r].rm_so = regs.start[r]; - pmatch[r].rm_eo = regs.end[r]; - } - } - - /* If we needed the temporary register info, free the space now. */ - free (regs.start); - free (regs.end); - } - - /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; -} - - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror(int errcode, const regex_t *preg, - char *errbuf, size_t errbuf_size) -{ - const char *msg; - size_t msg_size; - - if (errcode < 0 - || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = re_error_msg[errcode]; - - /* POSIX doesn't require that we do anything in this case, but why - not be nice. */ - if (! msg) - msg = "Success"; - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (errbuf_size != 0) - { - if (msg_size > errbuf_size) - { - strncpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; - } - else - strcpy (errbuf, msg); - } - - return msg_size; -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - if (preg->buffer != NULL) - free (preg->buffer); - preg->buffer = NULL; - - preg->allocated = 0; - preg->used = 0; - - if (preg->fastmap != NULL) - free (preg->fastmap); - preg->fastmap = NULL; - preg->fastmap_accurate = 0; - - if (preg->translate != NULL) - free (preg->translate); - preg->translate = NULL; -} - -#endif /* not emacs */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/compat/regex/regex.h b/compat/regex/regex.h index 6eb64f1402..61c9683872 100644 --- a/compat/regex/regex.h +++ b/compat/regex/regex.h @@ -1,70 +1,90 @@ -/* Definitions for data structures and routines for the regular - expression library, version 0.12. +#include <stdio.h> +#include <stddef.h> - Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008 + Free Software Foundation, Inc. + This file is part of the GNU C Library. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ -/* POSIX says that <sys/types.h> must be included (by the caller) before - <regex.h>. */ +#ifndef _REGEX_H +#define _REGEX_H 1 -#ifdef VMS -/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it - should be there. */ +#ifdef HAVE_STDDEF_H #include <stddef.h> #endif +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif + +#ifndef _LIBC +#define __USE_GNU 1 +#endif + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; /* The following bits are used to determine the regexp syntax we recognize. The set/not-set meanings are chosen so that Emacs syntax remains the value 0. The bits are given in alphabetical order, and the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; +typedef unsigned long int reg_syntax_t; +#ifdef __USE_GNU /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) +# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) /* If this bit is not set, then + and ? are operators, and \+ and \? are literals. If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) +# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) /* If this bit is set, then character classes are supported. They are: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) +# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) /* If this bit is set, then ^ and $ are always anchors (outside bracket expressions, of course). If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because POSIX draft 11.2 says that * etc. in leading positions is undefined. We already implemented a previous draft which made those constructs invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) +# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) /* If this bit is set, then special characters are always special regardless of where they are in the pattern. @@ -72,63 +92,94 @@ typedef unsigned reg_syntax_t; some contexts; otherwise they are ordinary. Specifically, * + ? and intervals are only special when not after the beginning, open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) +# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) /* If this bit is set, then *, +, ?, and { cannot be first in an re or immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) +# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) /* If this bit is set, then . matches newline. If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) +# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) /* If this bit is set, then . doesn't match NUL. If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) +# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) /* If this bit is set, nonmatching lists [^...] do not match newline. If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) +# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) /* If this bit is set, either \{...\} or {...} defines an interval, depending on RE_NO_BK_BRACES. If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) +# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) /* If this bit is set, +, ? and | aren't recognized as operators. If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) +# define RE_LIMITED_OPS (RE_INTERVALS << 1) /* If this bit is set, newline is an alternation operator. If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) +# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) /* If this bit is set, then `{...}' defines an interval, and \{ and \} are literals. If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) +# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) /* If this bit is set, (...) defines a group, and \( and \) are literals. If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) +# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) /* If this bit is set, then \<digit> matches <digit>. If not set, then \<digit> is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) +# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) /* If this bit is set, then | is an alternation operator, and \| is literal. If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) +# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) /* If this bit is set, then an ending range point collating higher than the starting range point, as in [z-a], is invalid. If not set, then when ending range point collates higher than the starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) +# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) /* If this bit is set, then an unmatched ) is ordinary. If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) +# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) +#endif /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is @@ -136,6 +187,7 @@ typedef unsigned reg_syntax_t; already-compiled regexps. */ extern reg_syntax_t re_syntax_options; +#ifdef __USE_GNU /* Define combinations of the above bits for the standard possibilities. (The [[[ comments delimit what gets put into the Texinfo file, so don't delete them!) */ @@ -143,13 +195,22 @@ extern reg_syntax_t re_syntax_options; #define RE_SYNTAX_EMACS 0 #define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INVALID_INTERVAL_ORD) \ + & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS \ + | RE_INVALID_INTERVAL_ORD) #define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ @@ -163,7 +224,8 @@ extern reg_syntax_t re_syntax_options; | RE_NO_BK_VBAR) #define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC @@ -176,7 +238,7 @@ extern reg_syntax_t re_syntax_options; | RE_INTERVALS | RE_NO_EMPTY_RANGES) #define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this @@ -185,13 +247,13 @@ extern reg_syntax_t re_syntax_options; (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) #define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ @@ -202,10 +264,12 @@ extern reg_syntax_t re_syntax_options; /* Maximum number of duplicates an interval can allow. Some systems (erroneously) define this in other header files, but we want our value, so remove any previous define. */ -#ifdef RE_DUP_MAX -#undef RE_DUP_MAX +# ifdef RE_DUP_MAX +# undef RE_DUP_MAX +# endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +# define RE_DUP_MAX (0x7fff) #endif -#define RE_DUP_MAX ((1 << 15) - 1) /* POSIX `cflags' bits (i.e., information for `regcomp'). */ @@ -240,18 +304,26 @@ extern reg_syntax_t re_syntax_options; /* Like REG_NOTBOL, except for the end-of-line. */ #define REG_NOTEOL (1 << 1) +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + /* If any error codes are removed, changed, or added, update the `re_error_msg' table in regex.c. */ typedef enum { +#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + REG_NOERROR = 0, /* Success. */ REG_NOMATCH, /* Didn't find a match (for regexec). */ /* POSIX regcomp return error codes. (In the order listed in the standard.) */ REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Not implemented. */ + REG_ECOLLATE, /* Inalid collating element. */ REG_ECTYPE, /* Invalid character class name. */ REG_EESCAPE, /* Trailing backslash. */ REG_ESUBREG, /* Invalid back reference. */ @@ -275,85 +347,92 @@ typedef enum compiled, the `re_nsub' field is available. All other fields are private to the regex routines. */ +#ifndef RE_TRANSLATE_TYPE +# define __RE_TRANSLATE_TYPE unsigned char * +# ifdef __USE_GNU +# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE +# endif +#endif + +#ifdef __USE_GNU +# define __REPB_PREFIX(name) name +#else +# define __REPB_PREFIX(name) __##name +#endif + struct re_pattern_buffer { -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *__REPB_PREFIX(buffer); - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; + /* Number of bytes to which `buffer' points. */ + unsigned long int __REPB_PREFIX(allocated); - /* Number of bytes actually used in `buffer'. */ - unsigned long used; + /* Number of bytes actually used in `buffer'. */ + unsigned long int __REPB_PREFIX(used); - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t __REPB_PREFIX(syntax); - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *__REPB_PREFIX(fastmap); - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); - /* Number of subexpressions found by the compiler. */ + /* Number of subexpressions found by the compiler. */ size_t re_nsub; - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ -}; + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned __REPB_PREFIX(can_be_null) : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#ifdef __USE_GNU +# define REGS_UNALLOCATED 0 +# define REGS_REALLOCATE 1 +# define REGS_FIXED 2 +#endif + unsigned __REPB_PREFIX(regs_allocated) : 2; -typedef struct re_pattern_buffer regex_t; + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned __REPB_PREFIX(fastmap_accurate) : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned __REPB_PREFIX(no_sub) : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned __REPB_PREFIX(not_bol) : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned __REPB_PREFIX(not_eol) : 1; + /* If true, an anchor at a newline matches. */ + unsigned __REPB_PREFIX(newline_anchor) : 1; +}; -/* search.c (search_buffer) in Emacs needs this one opcode value. It is - defined both in `regex.c' and here. */ -#define RE_EXACTN_VALUE 1 +typedef struct re_pattern_buffer regex_t; /* Type for byte offsets within the string. POSIX mandates this. */ typedef int regoff_t; +#ifdef __USE_GNU /* This is the structure we store register match data in. See regex.texinfo for a full description of what registers match. */ struct re_registers @@ -367,8 +446,9 @@ struct re_registers /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, `re_match_2' returns information about at least this many registers the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -#define RE_NREGS 30 +# ifndef RE_NREGS +# define RE_NREGS 30 +# endif #endif @@ -383,38 +463,22 @@ typedef struct /* Declarations for routines. */ -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -#define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -#define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - +#ifdef __USE_GNU /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); /* Compile the regular expression PATTERN, with length LENGTH and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, int length, - struct re_pattern_buffer *buffer)); +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); /* Compile a fastmap for the compiled pattern in BUFFER; used to accelerate searches. Return 0 if successful and -2 if was an internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); /* Search in the string STRING (with length LENGTH) for the pattern @@ -422,31 +486,30 @@ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); +extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring, + int __length, int __start, int __range, + struct re_registers *__regs); /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); /* Like `re_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); +extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring, + int __length, int __start, struct re_registers *__regs); /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -461,30 +524,59 @@ extern int re_match_2 Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); - +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); +#endif /* Use GNU */ + +#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD) +# ifndef _CRAY /* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif /* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); -extern int regexec - _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags)); -extern size_t regerror - _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size)); -extern void regfree _RE_ARGS ((regex_t *preg)); - -#endif /* not __REGEXP_LIBRARY_H__ */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __cstring, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c new file mode 100644 index 0000000000..193854cf5b --- /dev/null +++ b/compat/regex/regex_internal.c @@ -0,0 +1,1744 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +#ifdef GAWK +#undef MAX /* safety */ +static int +MAX(size_t a, size_t b) +{ + return (a > b ? a : b); +} +#endif + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs; + + /* Avoid overflow in realloc. */ + const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int)); + if (BE (SIZE_MAX / max_object_size < new_buf_len, 0)) + return REG_ESPACE; + + new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> + Then wide character buffer will be: + <wc1> , WEOF , <wc2> , WEOF , <wc3> + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wint_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + wchar_t wc2; + int remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + else + wc = (wint_t) wc2; + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { +#ifdef RE_ENABLE_I18N + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = __mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + /* + * ADR: valgrind says size can be 0, which then doesn't + * free the block of size 0. Harumph. This seems + * to work ok, though. + */ + if (size == 0) + { + memset(set, 0, sizeof(*set)); + return REG_NOERROR; + } + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows in realloc. */ + const size_t max_object_size = MAX (sizeof (re_token_t), + MAX (sizeof (re_node_set), + sizeof (int))); + if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0) + return REG_ESPACE; + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + if (re_node_set_init_copy (newstate->entrance_nodes, nodes) + != REG_NOERROR) + return NULL; + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} diff --git a/compat/regex/regex_internal.h b/compat/regex/regex_internal.h new file mode 100644 index 0000000000..4184d7f5a6 --- /dev/null +++ b/compat/regex/regex_internal.h @@ -0,0 +1,810 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2008, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include <langinfo.h> +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include <locale.h> +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include <wchar.h> +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include <wctype.h> +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include <stdbool.h> +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if !defined(ZOS_USS) +#if defined HAVE_STDINT_H || defined _LIBC +# include <stdint.h> +#endif /* HAVE_STDINT_H || _LIBC */ +#endif /* !ZOS_USS */ +#if defined _LIBC +# include <bits/libc-lock.h> +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +#ifndef GAWK +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif +#else /* GAWK */ +/* + * This is a freaking mess. On glibc systems you have to define + * a magic constant to get isblank() out of <ctype.h>, since it's + * a C99 function. To heck with all that and borrow a page from + * dfa.c's book. + */ + +static int +is_blank (int c) +{ + return (c == ' ' || c == '\t'); +} +#endif /* GAWK */ + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include <locale/localeinfo.h> +# include <locale/elem-hash.h> +# include <locale/coll-lookup.h> +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include <libintl.h> +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#ifndef NO_MBSUPPORT +#include "mbsupport.h" /* gawk */ +#endif +#ifndef MB_CUR_MAX +#define MB_CUR_MAX 1 +#endif + +#if (defined MBS_SUPPORT) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# ifdef inline +# undef inline +# endif +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# ifdef __wctype +# undef __wctype +# endif +# define __wctype wctype +# ifdef __iswctype +# undef __iswctype +# endif +# define __iswctype iswctype +# define __btowc btowc +# define __mbrtowc mbrtowc +#undef __mempcpy /* GAWK */ +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +#ifndef NOT_IN_libc +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +# ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) + internal_function; +# endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#endif +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifndef _LIBC +# if HAVE_ALLOCA +# if (_MSC_VER) +# include <malloc.h> +# define __libc_use_alloca(n) 0 +# else +# include <alloca.h> +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# endif +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +/* SunOS 4.1.x realloc doesn't accept null pointers: pre-Standard C. Sigh. */ +#define re_realloc(p,t,n) ((p != NULL) ? (t *) realloc (p,(n)*sizeof(t)) : (t *) calloc(n,sizeof(t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif +#if defined _LIBC + __libc_lock_define (, lock) +#endif +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +# ifndef NOT_IN_libc +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include <locale/weight.h> + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +# endif +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ diff --git a/compat/regex/regexec.c b/compat/regex/regexec.c new file mode 100644 index 0000000000..0194965c5d --- /dev/null +++ b/compat/regex/regexec.c @@ -0,0 +1,4369 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags); +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len); +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len); +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + int nregs, int regs_allocated); +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx); +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec ( + const regex_t *__restrict preg, + const char *__restrict string, + size_t nmatch, + regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t err; + int start, length; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include <shlib-compat.h> +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (struct re_pattern_buffer *bufp, + const char *string, + int length, + int start, + struct re_registers *regs) +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (struct re_pattern_buffer *bufp, + const char *string, + int length, int start, int range, + struct re_registers *regs) +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + struct re_registers *regs, int stop) +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + int range, struct re_registers *regs, int stop) +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + int range, struct re_registers *regs, + int stop, int ret_len) +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, + struct re_registers *regs, int ret_len) +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (struct re_registers *regs, + regmatch_t *pmatch, + int nregs, int regs_allocated) +{ + int rval = REGS_REALLOCATE; + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0)) + return REGS_UNALLOCATED; + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->end == NULL, 0)) + { + re_free (regs->start); + return REGS_UNALLOCATED; + } + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end; + if (BE (new_start == NULL, 0)) + return REGS_UNALLOCATED; + new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_end == NULL, 0)) + { + re_free (new_start); + return REGS_UNALLOCATED; + } + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (struct re_pattern_buffer *bufp, + struct re_registers *regs, + unsigned num_regs, + regoff_t *starts, + regoff_t *ends) +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (const regex_t *preg, + const char *string, + int length, int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + /* Avoid overflow. */ + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) + { + err = REG_ESPACE; + goto free_return; + } + + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (re_match_context_t *mctx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + + /* Avoid overflow. */ + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) + return REG_ESPACE; + + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] == NULL) + { + ret = REG_NOMATCH; + goto free_return; + } + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + +#ifdef HAVE_ALLOCA + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else +#endif + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + { + err = re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + } + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + <src> <dst> ( <subexp> ) + ( <subexp> ) <src> <dst> + ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? path->next_idx : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; +#ifdef RE_ENABLE_I18N + reg_errcode_t err = REG_NOERROR; +#endif + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ +#ifdef HAVE_ALLOCA + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else +#endif + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + /* Avoid arithmetic overflow in size calculation. */ + if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) + / (3 * sizeof (re_dfastate_t *))) + < ndests), + 0)) + goto out_free; + +#ifdef HAVE_ALLOCA + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else +#endif + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + wint_t wc; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + wc = __btowc(*(input->mbs+str_idx)); + if (((elem_len <= 1 && char_len <= 1) || char_len == 0) && (wc != WEOF && wc < SBC_MAX)) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + /* This #include defines a local function! */ +# include <locale/weight.h> + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + int32_t idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx & 0xffffff]; + if (weight_len == weights[equiv_class_idx & 0xffffff] + && (idx >> 24) == (equiv_class_idx >> 24)) + { + int cnt = 0; + + idx &= 0xffffff; + equiv_class_idx &= 0xffffff; + + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Avoid overflow. */ + if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) + return REG_ESPACE; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} diff --git a/compat/strtok_r.c b/compat/strtok_r.c new file mode 100644 index 0000000000..7b5d568a96 --- /dev/null +++ b/compat/strtok_r.c @@ -0,0 +1,61 @@ +/* Reentrant string tokenizer. Generic version. + Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "../git-compat-util.h" + +/* Parse S into tokens separated by characters in DELIM. + If S is NULL, the saved pointer in SAVE_PTR is used as + the next starting point. For example: + char s[] = "-abc-=-def"; + char *sp; + x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def" + x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL + x = strtok_r(NULL, "=", &sp); // x = NULL + // s = "abc\0-def\0" +*/ +char * +gitstrtok_r (char *s, const char *delim, char **save_ptr) +{ + char *token; + + if (s == NULL) + s = *save_ptr; + + /* Scan leading delimiters. */ + s += strspn (s, delim); + if (*s == '\0') + { + *save_ptr = s; + return NULL; + } + + /* Find the end of the token. */ + token = s; + s = strpbrk (token, delim); + if (s == NULL) + /* This token finishes the string. */ + *save_ptr = token + strlen (token); + else + { + /* Terminate the token and make *SAVE_PTR point past it. */ + *s = '\0'; + *save_ptr = s + 1; + } + return token; +} diff --git a/config.mak.in b/config.mak.in index b4e65c32b2..a0c34eec15 100644 --- a/config.mak.in +++ b/config.mak.in @@ -46,6 +46,7 @@ NO_IPV6=@NO_IPV6@ NO_C99_FORMAT=@NO_C99_FORMAT@ NO_HSTRERROR=@NO_HSTRERROR@ NO_STRCASESTR=@NO_STRCASESTR@ +NO_STRTOK_R=@NO_STRTOK_R@ NO_MEMMEM=@NO_MEMMEM@ NO_STRLCPY=@NO_STRLCPY@ NO_UINTMAX_T=@NO_UINTMAX_T@ @@ -58,6 +59,7 @@ NO_INET_NTOP=@NO_INET_NTOP@ NO_INET_PTON=@NO_INET_PTON@ NO_ICONV=@NO_ICONV@ OLD_ICONV=@OLD_ICONV@ +NO_REGEX=@NO_REGEX@ NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@ INLINE=@INLINE@ SOCKLEN_T=@SOCKLEN_T@ diff --git a/configure.ac b/configure.ac index 5601e8bac9..56731c35c9 100644 --- a/configure.ac +++ b/configure.ac @@ -706,6 +706,27 @@ else fi AC_SUBST(NO_C99_FORMAT) # +# Define NO_REGEX if you have no or inferior regex support in your C library. +AC_CACHE_CHECK([whether the platform regex can handle null bytes], + [ac_cv_c_excellent_regex], [ +AC_EGREP_CPP(yippeeyeswehaveit, + AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT +#include <regex.h> +], +[#ifdef REG_STARTEND +yippeeyeswehaveit +#endif +]), + [ac_cv_c_excellent_regex=yes], + [ac_cv_c_excellent_regex=no]) +]) +if test $ac_cv_c_excellent_regex = yes; then + NO_REGEX= +else + NO_REGEX=YesPlease +fi +AC_SUBST(NO_REGEX) +# # Define FREAD_READS_DIRECTORIES if your are on a system which succeeds # when attempting to read from an fopen'ed directory. AC_CACHE_CHECK([whether system succeeds to read fopen'ed directory], @@ -783,6 +804,12 @@ GIT_CHECK_FUNC(strcasestr, [NO_STRCASESTR=YesPlease]) AC_SUBST(NO_STRCASESTR) # +# Define NO_STRTOK_R if you don't have strtok_r +GIT_CHECK_FUNC(strtok_r, +[NO_STRTOK_R=], +[NO_STRTOK_R=YesPlease]) +AC_SUBST(NO_STRTOK_R) +# # Define NO_MEMMEM if you don't have memmem. GIT_CHECK_FUNC(memmem, [NO_MEMMEM=], diff --git a/contrib/examples/git-commit.sh b/contrib/examples/git-commit.sh index 5c72f655c7..23ffb028d1 100755 --- a/contrib/examples/git-commit.sh +++ b/contrib/examples/git-commit.sh @@ -631,7 +631,7 @@ then if test -z "$quiet" then commit=`git diff-tree --always --shortstat --pretty="format:%h: %s"\ - --summary --root HEAD --` + --abbrev --summary --root HEAD --` echo "Created${initial_commit:+ initial} commit $commit" fi fi diff --git a/contrib/examples/git-merge.sh b/contrib/examples/git-merge.sh index 8f617fcb70..7b922c3948 100755 --- a/contrib/examples/git-merge.sh +++ b/contrib/examples/git-merge.sh @@ -15,7 +15,10 @@ log add list of one-line log to merge commit message squash create a single commit instead of doing a merge commit perform a commit if the merge succeeds (default) ff allow fast-forward (default) +ff-only abort if fast-forward is not possible +rerere-autoupdate update index with any reused conflict resolution s,strategy= merge strategy to use +X= option for selected merge strategy m,message= message to be used for the merge commit (if any) " @@ -25,26 +28,32 @@ require_work_tree cd_to_toplevel test -z "$(git ls-files -u)" || - die "You are in the middle of a conflicted merge." + die "Merge is not possible because you have unmerged files." + +! test -e "$GIT_DIR/MERGE_HEAD" || + die 'You have not concluded your merge (MERGE_HEAD exists).' LF=' ' all_strategies='recur recursive octopus resolve stupid ours subtree' all_strategies="$all_strategies recursive-ours recursive-theirs" +not_strategies='base file index tree' default_twohead_strategies='recursive' default_octopus_strategies='octopus' no_fast_forward_strategies='subtree ours' no_trivial_strategies='recursive recur subtree ours recursive-ours recursive-theirs' use_strategies= +xopt= allow_fast_forward=t +fast_forward_only= allow_trivial_merge=t -squash= no_commit= log_arg= +squash= no_commit= log_arg= rr_arg= dropsave() { rm -f -- "$GIT_DIR/MERGE_HEAD" "$GIT_DIR/MERGE_MSG" \ - "$GIT_DIR/MERGE_STASH" || exit 1 + "$GIT_DIR/MERGE_STASH" "$GIT_DIR/MERGE_MODE" || exit 1 } savestate() { @@ -131,21 +140,34 @@ finish () { merge_name () { remote="$1" rh=$(git rev-parse --verify "$remote^0" 2>/dev/null) || return - bh=$(git show-ref -s --verify "refs/heads/$remote" 2>/dev/null) - if test "$rh" = "$bh" - then - echo "$rh branch '$remote' of ." - elif truname=$(expr "$remote" : '\(.*\)~[1-9][0-9]*$') && + if truname=$(expr "$remote" : '\(.*\)~[0-9]*$') && git show-ref -q --verify "refs/heads/$truname" 2>/dev/null then echo "$rh branch '$truname' (early part) of ." - elif test "$remote" = "FETCH_HEAD" -a -r "$GIT_DIR/FETCH_HEAD" + return + fi + if found_ref=$(git rev-parse --symbolic-full-name --verify \ + "$remote" 2>/dev/null) + then + expanded=$(git check-ref-format --branch "$remote") || + exit + if test "${found_ref#refs/heads/}" != "$found_ref" + then + echo "$rh branch '$expanded' of ." + return + elif test "${found_ref#refs/remotes/}" != "$found_ref" + then + echo "$rh remote branch '$expanded' of ." + return + fi + fi + if test "$remote" = "FETCH_HEAD" -a -r "$GIT_DIR/FETCH_HEAD" then sed -e 's/ not-for-merge / /' -e 1q \ "$GIT_DIR/FETCH_HEAD" - else - echo "$rh commit '$remote'" + return fi + echo "$rh commit '$remote'" } parse_config () { @@ -172,16 +194,36 @@ parse_config () { --no-ff) test "$squash" != t || die "You cannot combine --squash with --no-ff." + test "$fast_forward_only" != t || + die "You cannot combine --ff-only with --no-ff." allow_fast_forward=f ;; + --ff-only) + test "$allow_fast_forward" != f || + die "You cannot combine --ff-only with --no-ff." + fast_forward_only=t ;; + --rerere-autoupdate|--no-rerere-autoupdate) + rr_arg=$1 ;; -s|--strategy) shift case " $all_strategies " in *" $1 "*) - use_strategies="$use_strategies$1 " ;; + use_strategies="$use_strategies$1 " + ;; *) - die "available strategies are: $all_strategies" ;; + case " $not_strategies " in + *" $1 "*) + false + esac && + type "git-merge-$1" >/dev/null 2>&1 || + die "available strategies are: $all_strategies" + use_strategies="$use_strategies$1 " + ;; esac ;; + -X) + shift + xopt="${xopt:+$xopt }$(git rev-parse --sq-quote "--$1")" + ;; -m|--message) shift merge_msg="$1" @@ -245,6 +287,10 @@ then exit 1 fi + test "$squash" != t || + die "Squash commit into empty head not supported yet" + test "$allow_fast_forward" = t || + die "Non-fast-forward into an empty head does not make sense" rh=$(git rev-parse --verify "$1^0") || die "$1 - not something we can merge" @@ -261,12 +307,18 @@ else # the given message. If remote is invalid we will die # later in the common codepath so we discard the error # in this loop. - merge_name=$(for remote + merge_msg="$( + for remote do merge_name "$remote" - done | git fmt-merge-msg $log_arg - ) - merge_msg="${merge_msg:+$merge_msg$LF$LF}$merge_name" + done | + if test "$have_message" = t + then + git fmt-merge-msg -m "$merge_msg" $log_arg + else + git fmt-merge-msg $log_arg + fi + )" fi head=$(git rev-parse --verify "$head_arg"^0) || usage @@ -335,7 +387,7 @@ case "$#" in common=$(git merge-base --all $head "$@") ;; *) - common=$(git show-branch --merge-base $head "$@") + common=$(git merge-base --all --octopus $head "$@") ;; esac echo "$head" >"$GIT_DIR/ORIG_HEAD" @@ -373,8 +425,8 @@ t,1,"$head",*) # We are not doing octopus, not fast-forward, and have only # one common. git update-index --refresh 2>/dev/null - case "$allow_trivial_merge" in - t) + case "$allow_trivial_merge,$fast_forward_only" in + t,) # See if it is really trivial. git var GIT_COMMITTER_IDENT >/dev/null || exit echo "Trying really trivial in-index merge..." @@ -413,6 +465,11 @@ t,1,"$head",*) ;; esac +if test "$fast_forward_only" = t +then + die "Not possible to fast-forward, aborting." +fi + # We are going to make a new commit. git var GIT_COMMITTER_IDENT >/dev/null || exit @@ -451,7 +508,7 @@ do # Remember which strategy left the state in the working tree wt_strategy=$strategy - git-merge-$strategy $common -- "$head_arg" "$@" + eval 'git-merge-$strategy '"$xopt"' $common -- "$head_arg" "$@"' exit=$? if test "$no_commit" = t && test "$exit" = 0 then @@ -489,9 +546,9 @@ if test '' != "$result_tree" then if test "$allow_fast_forward" = "t" then - parents=$(git show-branch --independent "$head" "$@") + parents=$(git merge-base --independent "$head" "$@") else - parents=$(git rev-parse "$head" "$@") + parents=$(git rev-parse "$head" "$@") fi parents=$(echo "$parents" | sed -e 's/^/-p /') result_commit=$(printf '%s\n' "$merge_msg" | git commit-tree $result_tree $parents) || exit @@ -533,7 +590,15 @@ else do echo $remote done >"$GIT_DIR/MERGE_HEAD" - printf '%s\n' "$merge_msg" >"$GIT_DIR/MERGE_MSG" + printf '%s\n' "$merge_msg" >"$GIT_DIR/MERGE_MSG" || + die "Could not write to $GIT_DIR/MERGE_MSG" + if test "$allow_fast_forward" != t + then + printf "%s" no-ff + else + : + fi >"$GIT_DIR/MERGE_MODE" || + die "Could not write to $GIT_DIR/MERGE_MODE" fi if test "$merge_was_ok" = t @@ -550,6 +615,6 @@ Conflicts: sed -e 's/^[^ ]* / /' | uniq } >>"$GIT_DIR/MERGE_MSG" - git rerere + git rerere $rr_arg die "Automatic merge failed; fix conflicts and then commit the result." fi diff --git a/contrib/examples/git-revert.sh b/contrib/examples/git-revert.sh index 49f00321b2..60a05a8b97 100755 --- a/contrib/examples/git-revert.sh +++ b/contrib/examples/git-revert.sh @@ -181,7 +181,6 @@ Conflicts: esac exit 1 } -echo >&2 "Finished one $me." # If we are cherry-pick, and if the merge did not result in # hand-editing, we will hit this commit and inherit the original diff --git a/contrib/hooks/post-receive-email b/contrib/hooks/post-receive-email index 30ae63d74d..0085086437 100755 --- a/contrib/hooks/post-receive-email +++ b/contrib/hooks/post-receive-email @@ -55,6 +55,11 @@ # "t=%s; printf 'http://.../?id=%%s' \$t; echo;echo; git show -C \$t; echo" # Be careful if "..." contains things that will be expanded by shell "eval" # or printf. +# hooks.emailmaxlines +# The maximum number of lines that should be included in the generated +# email body. If not specified, there is no limit. +# Lines beyond the limit are suppressed and counted, and a final +# line is added indicating the number of suppressed lines. # # Notes # ----- @@ -84,6 +89,7 @@ generate_email() oldrev=$(git rev-parse $1) newrev=$(git rev-parse $2) refname="$3" + maxlines=$4 # --- Interpret # 0000->1234 (create) @@ -192,7 +198,12 @@ generate_email() fn_name=atag ;; esac - generate_${change_type}_${fn_name}_email + + if [ -z "$maxlines" ]; then + generate_${change_type}_${fn_name}_email + else + generate_${change_type}_${fn_name}_email | limit_lines $maxlines + fi generate_email_footer } @@ -203,7 +214,7 @@ generate_email_header() # Generate header cat <<-EOF To: $recipients - Subject: ${emailprefix}$projectdesc $refname_type, $short_refname, ${change_type}d. $describe + Subject: ${emailprefix}$projectdesc $refname_type $short_refname ${change_type}d. $describe X-Git-Refname: $refname X-Git-Reftype: $refname_type X-Git-Oldrev: $oldrev @@ -642,6 +653,24 @@ show_new_revisions() } +limit_lines() +{ + lines=0 + skipped=0 + while IFS="" read -r line; do + lines=$((lines + 1)) + if [ $lines -gt $1 ]; then + skipped=$((skipped + 1)) + else + printf "%s\n" "$line" + fi + done + if [ $skipped -ne 0 ]; then + echo "... $skipped lines suppressed ..." + fi +} + + send_mail() { if [ -n "$envelopesender" ]; then @@ -679,6 +708,7 @@ announcerecipients=$(git config hooks.announcelist) envelopesender=$(git config hooks.envelopesender) emailprefix=$(git config hooks.emailprefix || echo '[SCM] ') custom_showrev=$(git config hooks.showrev) +maxlines=$(git config hooks.emailmaxlines) # --- Main loop # Allow dual mode: run from the command line just like the update hook, or @@ -691,6 +721,6 @@ if [ -n "$1" -a -n "$2" -a -n "$3" ]; then else while read oldrev newrev refname do - generate_email $oldrev $newrev $refname | send_mail + generate_email $oldrev $newrev $refname $maxlines | send_mail done fi diff --git a/contrib/svn-fe/.gitignore b/contrib/svn-fe/.gitignore index 27a33b669e..02a7791585 100644 --- a/contrib/svn-fe/.gitignore +++ b/contrib/svn-fe/.gitignore @@ -1,3 +1,4 @@ /*.xml /*.1 /*.html +/svn-fe diff --git a/contrib/svn-fe/Makefile b/contrib/svn-fe/Makefile index 4cc8d15827..360d8da417 100644 --- a/contrib/svn-fe/Makefile +++ b/contrib/svn-fe/Makefile @@ -38,7 +38,7 @@ svn-fe$X: svn-fe.o $(VCSSVN_LIB) $(GIT_LIB) $(ALL_LDFLAGS) $(LIBS) svn-fe.o: svn-fe.c ../../vcs-svn/svndump.h - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< + $(QUIET_CC)$(CC) -I../../vcs-svn -o $*.o -c $(ALL_CFLAGS) $< svn-fe.html: svn-fe.txt $(QUIET_SUBDIR0)../../Documentation $(QUIET_SUBDIR1) \ diff --git a/contrib/svn-fe/svn-fe.c b/contrib/svn-fe/svn-fe.c index 43c4320cac..a2677b03e0 100644 --- a/contrib/svn-fe/svn-fe.c +++ b/contrib/svn-fe/svn-fe.c @@ -4,12 +4,13 @@ */ #include <stdlib.h> -#include "vcs-svn/svndump.h" +#include "svndump.h" int main(int argc, char **argv) { svndump_init(NULL); svndump_read((argc > 1) ? argv[1] : NULL); + svndump_deinit(); svndump_reset(); return 0; } diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt index de30f83a1f..35f84bd9e7 100644 --- a/contrib/svn-fe/svn-fe.txt +++ b/contrib/svn-fe/svn-fe.txt @@ -12,7 +12,7 @@ svnadmin dump --incremental REPO | svn-fe [url] | git fast-import DESCRIPTION ----------- -Converts a Subversion dumpfile (version: 2) into input suitable for +Converts a Subversion dumpfile into input suitable for git-fast-import(1) and similar importers. REPO is a path to a Subversion repository mirrored on the local disk. Remote Subversion repositories can be mirrored on local disk using the `svnsync` @@ -25,6 +25,9 @@ Subversion's repository dump format is documented in full in Files in this format can be generated using the 'svnadmin dump' or 'svk admin dump' command. +Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3) +are not supported. + OUTPUT FORMAT ------------- The fast-import format is documented by the git-fast-import(1) @@ -43,11 +46,9 @@ user <user@UUID> as committer, where 'user' is the value of the `svn:author` property and 'UUID' the repository's identifier. -To support incremental imports, 'svn-fe' will put a `git-svn-id` -line at the end of each commit log message if passed an url on the -command line. This line has the form `git-svn-id: URL@REVNO UUID`. - -Empty directories and unknown properties are silently discarded. +To support incremental imports, 'svn-fe' puts a `git-svn-id` line at +the end of each commit log message if passed an url on the command +line. This line has the form `git-svn-id: URL@REVNO UUID`. The resulting repository will generally require further processing to put each project in its own repository and to separate the history @@ -56,9 +57,9 @@ may be useful for this purpose. BUGS ---- -Litters the current working directory with .bin files for -persistence. Will be fixed when the svn-fe infrastructure is aware of -a Git working directory. +Empty directories and unknown properties are silently discarded. + +The exit status does not reflect whether an error was detected. SEE ALSO -------- diff --git a/contrib/workdir/git-new-workdir b/contrib/workdir/git-new-workdir index 993cacf324..3ad2c0cea5 100755 --- a/contrib/workdir/git-new-workdir +++ b/contrib/workdir/git-new-workdir @@ -54,13 +54,13 @@ then die "destination directory '$new_workdir' already exists." fi -# make sure the the links use full paths +# make sure the links use full paths git_dir=$(cd "$git_dir"; pwd) # create the workdir mkdir -p "$new_workdir/.git" || die "unable to create \"$new_workdir\"!" -# create the links to the original repo. explictly exclude index, HEAD and +# create the links to the original repo. explicitly exclude index, HEAD and # logs/HEAD from the list since they are purely related to the current working # directory, and should not be shared. for x in config refs logs/refs objects info hooks packed-refs remotes rr-cache svn @@ -93,7 +93,8 @@ static int is_binary(unsigned long size, struct text_stat *stats) return 0; } -static enum eol determine_output_conversion(enum action action) { +static enum eol determine_output_conversion(enum action action) +{ switch (action) { case CRLF_BINARY: return EOL_UNSET; @@ -693,7 +694,8 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) return !!ATTR_TRUE(value); } -enum action determine_action(enum action text_attr, enum eol eol_attr) { +static enum action determine_action(enum action text_attr, enum eol eol_attr) +{ if (text_attr == CRLF_BINARY) return CRLF_BINARY; if (eol_attr == EOL_LF) @@ -739,7 +741,9 @@ int convert_to_git(const char *path, const char *src, size_t len, return ret | ident_to_git(path, src, len, dst, ident); } -int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst) +static int convert_to_working_tree_internal(const char *path, const char *src, + size_t len, struct strbuf *dst, + int normalizing) { struct git_attr_check check[5]; enum action action = CRLF_GUESS; @@ -765,11 +769,32 @@ int convert_to_working_tree(const char *path, const char *src, size_t len, struc src = dst->buf; len = dst->len; } - action = determine_action(action, eol_attr); - ret |= crlf_to_worktree(path, src, len, dst, action); + /* + * CRLF conversion can be skipped if normalizing, unless there + * is a smudge filter. The filter might expect CRLFs. + */ + if (filter || !normalizing) { + action = determine_action(action, eol_attr); + ret |= crlf_to_worktree(path, src, len, dst, action); + if (ret) { + src = dst->buf; + len = dst->len; + } + } + return ret | apply_filter(path, src, len, dst, filter); +} + +int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst) +{ + return convert_to_working_tree_internal(path, src, len, dst, 0); +} + +int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst) +{ + int ret = convert_to_working_tree_internal(path, src, len, dst, 1); if (ret) { src = dst->buf; len = dst->len; } - return ret | apply_filter(path, src, len, dst, filter); + return ret | convert_to_git(path, src, len, dst, 0); } @@ -586,7 +586,7 @@ static int date_string(unsigned long date, int offset, char *buf, int len) /* Gr. strptime is crap for this; it doesn't have a way to require RFC2822 (i.e. English) day/month names, and it doesn't work correctly with %z. */ -int parse_date_toffset(const char *date, unsigned long *timestamp, int *offset) +int parse_date_basic(const char *date, unsigned long *timestamp, int *offset) { struct tm tm; int tm_gmt; @@ -642,17 +642,16 @@ int parse_date_toffset(const char *date, unsigned long *timestamp, int *offset) if (!tm_gmt) *timestamp -= *offset * 60; - return 1; /* success */ + return 0; /* success */ } int parse_date(const char *date, char *result, int maxlen) { unsigned long timestamp; int offset; - if (parse_date_toffset(date, ×tamp, &offset) > 0) - return date_string(timestamp, offset, result, maxlen); - else + if (parse_date_basic(date, ×tamp, &offset)) return -1; + return date_string(timestamp, offset, result, maxlen); } enum date_mode parse_date_format(const char *format) @@ -1004,9 +1003,8 @@ unsigned long approxidate_relative(const char *date, const struct timeval *tv) int offset; int errors = 0; - if (parse_date_toffset(date, ×tamp, &offset) > 0) + if (!parse_date_basic(date, ×tamp, &offset)) return timestamp; - return approxidate_str(date, tv, &errors); } @@ -1019,7 +1017,7 @@ unsigned long approxidate_careful(const char *date, int *error_ret) if (!error_ret) error_ret = &dummy; - if (parse_date_toffset(date, ×tamp, &offset) > 0) { + if (!parse_date_basic(date, ×tamp, &offset)) { *error_ret = 0; return timestamp; } diff --git a/diff-delta.c b/diff-delta.c index 464ac3ffc0..93385e12ba 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -146,7 +146,14 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) /* Determine index hash size. Note that indexing skips the first byte to allow for optimizing the Rabin's polynomial initialization in create_delta(). */ - entries = (bufsize - 1) / RABIN_WINDOW; + entries = (bufsize - 1) / RABIN_WINDOW; + if (bufsize >= 0xffffffffUL) { + /* + * Current delta format can't encode offsets into + * reference buffer with more than 32 bits. + */ + entries = 0xfffffffeU / RABIN_WINDOW; + } hsize = entries / 4; for (i = 4; (1u << i) < hsize && i < 31; i++); hsize = 1 << i; diff --git a/diff-lib.c b/diff-lib.c index 8b8978ae6d..392ce2bef0 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -68,11 +68,16 @@ static int match_stat_with_submodule(struct diff_options *diffopt, unsigned ce_option, unsigned *dirty_submodule) { int changed = ce_match_stat(ce, st, ce_option); - if (S_ISGITLINK(ce->ce_mode) - && !DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES) - && !DIFF_OPT_TST(diffopt, IGNORE_DIRTY_SUBMODULES) - && (!changed || DIFF_OPT_TST(diffopt, DIRTY_SUBMODULES))) { - *dirty_submodule = is_submodule_modified(ce->name, DIFF_OPT_TST(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES)); + if (S_ISGITLINK(ce->ce_mode)) { + unsigned orig_flags = diffopt->flags; + if (!DIFF_OPT_TST(diffopt, OVERRIDE_SUBMODULE_CONFIG)) + set_diffopt_flags_from_submodule_config(diffopt, ce->name); + if (DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES)) + changed = 0; + else if (!DIFF_OPT_TST(diffopt, IGNORE_DIRTY_SUBMODULES) + && (!changed || DIFF_OPT_TST(diffopt, DIRTY_SUBMODULES))) + *dirty_submodule = is_submodule_modified(ce->name, DIFF_OPT_TST(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES)); + diffopt->flags = orig_flags; } return changed; } diff --git a/diff-no-index.c b/diff-no-index.c index 43aeeba2e0..ce9e783407 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -64,7 +64,8 @@ static int queue_diff(struct diff_options *o, if (S_ISDIR(mode1) || S_ISDIR(mode2)) { char buffer1[PATH_MAX], buffer2[PATH_MAX]; - struct string_list p1 = {NULL, 0, 0, 1}, p2 = {NULL, 0, 0, 1}; + struct string_list p1 = STRING_LIST_INIT_DUP; + struct string_list p2 = STRING_LIST_INIT_DUP; int len1 = 0, len2 = 0, i1, i2, ret = 0; if (name1 && read_directory(name1, &p1)) @@ -31,6 +31,7 @@ static const char *external_diff_cmd_cfg; int diff_auto_refresh_index = 1; static int diff_mnemonic_prefix; static int diff_no_prefix; +static struct diff_options default_diff_options; static char diff_colors[][COLOR_MAXLEN] = { GIT_COLOR_RESET, @@ -107,6 +108,9 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) if (!strcmp(var, "diff.wordregex")) return git_config_string(&diff_word_regex_cfg, var, value); + if (!strcmp(var, "diff.ignoresubmodules")) + handle_ignore_submodules_arg(&default_diff_options, value); + return git_diff_basic_config(var, value, cb); } @@ -141,6 +145,9 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) return 0; } + if (!prefixcmp(var, "submodule.")) + return parse_submodule_config_option(var, value); + return git_color_default_config(var, value, cb); } @@ -2704,10 +2711,16 @@ static void diff_fill_sha1_info(struct diff_filespec *one) static void strip_prefix(int prefix_length, const char **namep, const char **otherp) { /* Strip the prefix but do not molest /dev/null and absolute paths */ - if (*namep && **namep != '/') + if (*namep && **namep != '/') { *namep += prefix_length; - if (*otherp && **otherp != '/') + if (**namep == '/') + ++*namep; + } + if (*otherp && **otherp != '/') { *otherp += prefix_length; + if (**otherp == '/') + ++*otherp; + } } static void run_diff(struct diff_filepair *p, struct diff_options *o) @@ -2813,8 +2826,7 @@ static void run_checkdiff(struct diff_filepair *p, struct diff_options *o) void diff_setup(struct diff_options *options) { - memset(options, 0, sizeof(*options)); - memset(&diff_queued_diff, 0, sizeof(diff_queued_diff)); + memcpy(options, &default_diff_options, sizeof(*options)); options->file = stdout; @@ -2990,9 +3002,100 @@ static int opt_arg(const char *arg, int arg_short, const char *arg_long, int *va static int diff_scoreopt_parse(const char *opt); +static inline int short_opt(char opt, const char **argv, + const char **optarg) +{ + const char *arg = argv[0]; + if (arg[0] != '-' || arg[1] != opt) + return 0; + if (arg[2] != '\0') { + *optarg = arg + 2; + return 1; + } + if (!argv[1]) + die("Option '%c' requires a value", opt); + *optarg = argv[1]; + return 2; +} + +int parse_long_opt(const char *opt, const char **argv, + const char **optarg) +{ + const char *arg = argv[0]; + if (arg[0] != '-' || arg[1] != '-') + return 0; + arg += strlen("--"); + if (prefixcmp(arg, opt)) + return 0; + arg += strlen(opt); + if (*arg == '=') { /* sticked form: --option=value */ + *optarg = arg + 1; + return 1; + } + if (*arg != '\0') + return 0; + /* separate form: --option value */ + if (!argv[1]) + die("Option '--%s' requires a value", opt); + *optarg = argv[1]; + return 2; +} + +static int stat_opt(struct diff_options *options, const char **av) +{ + const char *arg = av[0]; + char *end; + int width = options->stat_width; + int name_width = options->stat_name_width; + int argcount = 1; + + arg += strlen("--stat"); + end = (char *)arg; + + switch (*arg) { + case '-': + if (!prefixcmp(arg, "-width")) { + arg += strlen("-width"); + if (*arg == '=') + width = strtoul(arg + 1, &end, 10); + else if (!*arg && !av[1]) + die("Option '--stat-width' requires a value"); + else if (!*arg) { + width = strtoul(av[1], &end, 10); + argcount = 2; + } + } else if (!prefixcmp(arg, "-name-width")) { + arg += strlen("-name-width"); + if (*arg == '=') + name_width = strtoul(arg + 1, &end, 10); + else if (!*arg && !av[1]) + die("Option '--stat-name-width' requires a value"); + else if (!*arg) { + name_width = strtoul(av[1], &end, 10); + argcount = 2; + } + } + break; + case '=': + width = strtoul(arg+1, &end, 10); + if (*end == ',') + name_width = strtoul(end+1, &end, 10); + } + + /* Important! This checks all the error cases! */ + if (*end) + return 0; + options->output_format |= DIFF_FORMAT_DIFFSTAT; + options->stat_name_width = name_width; + options->stat_width = width; + return argcount; +} + int diff_opt_parse(struct diff_options *options, const char **av, int ac) { const char *arg = av[0]; + const char *optarg; + int argcount; /* Output format options */ if (!strcmp(arg, "-p") || !strcmp(arg, "-u") || !strcmp(arg, "--patch")) @@ -3029,33 +3132,9 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) options->output_format |= DIFF_FORMAT_NAME_STATUS; else if (!strcmp(arg, "-s")) options->output_format |= DIFF_FORMAT_NO_OUTPUT; - else if (!prefixcmp(arg, "--stat")) { - char *end; - int width = options->stat_width; - int name_width = options->stat_name_width; - arg += 6; - end = (char *)arg; - - switch (*arg) { - case '-': - if (!prefixcmp(arg, "-width=")) - width = strtoul(arg + 7, &end, 10); - else if (!prefixcmp(arg, "-name-width=")) - name_width = strtoul(arg + 12, &end, 10); - break; - case '=': - width = strtoul(arg+1, &end, 10); - if (*end == ',') - name_width = strtoul(end+1, &end, 10); - } - - /* Important! This checks all the error cases! */ - if (*end) - return 0; - options->output_format |= DIFF_FORMAT_DIFFSTAT; - options->stat_name_width = name_width; - options->stat_width = width; - } + else if (!prefixcmp(arg, "--stat")) + /* --stat, --stat-width, or --stat-name-width */ + return stat_opt(options, av); /* renames options */ else if (!prefixcmp(arg, "-B")) { @@ -3149,10 +3228,11 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) else die("bad --word-diff argument: %s", type); } - else if (!prefixcmp(arg, "--word-diff-regex=")) { + else if ((argcount = parse_long_opt("word-diff-regex", av, &optarg))) { if (options->word_diff == DIFF_WORDS_NONE) options->word_diff = DIFF_WORDS_PLAIN; - options->word_regex = arg + 18; + options->word_regex = optarg; + return argcount; } else if (!strcmp(arg, "--exit-code")) DIFF_OPT_SET(options, EXIT_WITH_STATUS); @@ -3166,11 +3246,13 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) DIFF_OPT_SET(options, ALLOW_TEXTCONV); else if (!strcmp(arg, "--no-textconv")) DIFF_OPT_CLR(options, ALLOW_TEXTCONV); - else if (!strcmp(arg, "--ignore-submodules")) + else if (!strcmp(arg, "--ignore-submodules")) { + DIFF_OPT_SET(options, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(options, "all"); - else if (!prefixcmp(arg, "--ignore-submodules=")) + } else if (!prefixcmp(arg, "--ignore-submodules=")) { + DIFF_OPT_SET(options, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(options, arg + 20); - else if (!strcmp(arg, "--submodule")) + } else if (!strcmp(arg, "--submodule")) DIFF_OPT_SET(options, SUBMODULE_LOG); else if (!prefixcmp(arg, "--submodule=")) { if (!strcmp(arg + 12, "log")) @@ -3180,18 +3262,26 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) /* misc options */ else if (!strcmp(arg, "-z")) options->line_termination = 0; - else if (!prefixcmp(arg, "-l")) - options->rename_limit = strtoul(arg+2, NULL, 10); - else if (!prefixcmp(arg, "-S")) - options->pickaxe = arg + 2; + else if ((argcount = short_opt('l', av, &optarg))) { + options->rename_limit = strtoul(optarg, NULL, 10); + return argcount; + } + else if ((argcount = short_opt('S', av, &optarg))) { + options->pickaxe = optarg; + return argcount; + } else if (!strcmp(arg, "--pickaxe-all")) options->pickaxe_opts = DIFF_PICKAXE_ALL; else if (!strcmp(arg, "--pickaxe-regex")) options->pickaxe_opts = DIFF_PICKAXE_REGEX; - else if (!prefixcmp(arg, "-O")) - options->orderfile = arg + 2; - else if (!prefixcmp(arg, "--diff-filter=")) - options->filter = arg + 14; + else if ((argcount = short_opt('O', av, &optarg))) { + options->orderfile = optarg; + return argcount; + } + else if ((argcount = parse_long_opt("diff-filter", av, &optarg))) { + options->filter = optarg; + return argcount; + } else if (!strcmp(arg, "--abbrev")) options->abbrev = DEFAULT_ABBREV; else if (!prefixcmp(arg, "--abbrev=")) { @@ -3201,20 +3291,25 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) else if (40 < options->abbrev) options->abbrev = 40; } - else if (!prefixcmp(arg, "--src-prefix=")) - options->a_prefix = arg + 13; - else if (!prefixcmp(arg, "--dst-prefix=")) - options->b_prefix = arg + 13; + else if ((argcount = parse_long_opt("src-prefix", av, &optarg))) { + options->a_prefix = optarg; + return argcount; + } + else if ((argcount = parse_long_opt("dst-prefix", av, &optarg))) { + options->b_prefix = optarg; + return argcount; + } else if (!strcmp(arg, "--no-prefix")) options->a_prefix = options->b_prefix = ""; else if (opt_arg(arg, '\0', "inter-hunk-context", &options->interhunkcontext)) ; - else if (!prefixcmp(arg, "--output=")) { - options->file = fopen(arg + strlen("--output="), "w"); + else if ((argcount = parse_long_opt("output", av, &optarg))) { + options->file = fopen(optarg, "w"); if (!options->file) die_errno("Could not open '%s'", arg + strlen("--output=")); options->close_file = 1; + return argcount; } else return 0; return 1; @@ -3758,6 +3853,13 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) len2, p->two->path); git_SHA1_Update(&ctx, buffer, len1); + if (diff_filespec_is_binary(p->one) || + diff_filespec_is_binary(p->two)) { + git_SHA1_Update(&ctx, sha1_to_hex(p->one->sha1), 40); + git_SHA1_Update(&ctx, sha1_to_hex(p->two->sha1), 40); + continue; + } + xpp.flags = 0; xecfg.ctxlen = 3; xecfg.flags = XDL_EMIT_FUNCNAMES; @@ -4059,25 +4161,24 @@ void diffcore_fix_diff_index(struct diff_options *options) void diffcore_std(struct diff_options *options) { - /* We never run this function more than one time, because the - * rename/copy detection logic can only run once. - */ - if (diff_queued_diff.run) - return; - if (options->skip_stat_unmatch) diffcore_skip_stat_unmatch(options); - if (options->break_opt != -1) - diffcore_break(options->break_opt); - if (options->detect_rename) - diffcore_rename(options); - if (options->break_opt != -1) - diffcore_merge_broken(); + if (!options->found_follow) { + /* See try_to_follow_renames() in tree-diff.c */ + if (options->break_opt != -1) + diffcore_break(options->break_opt); + if (options->detect_rename) + diffcore_rename(options); + if (options->break_opt != -1) + diffcore_merge_broken(); + } if (options->pickaxe) diffcore_pickaxe(options->pickaxe, options->pickaxe_opts); if (options->orderfile) diffcore_order(options->orderfile); - diff_resolve_rename_copy(); + if (!options->found_follow) + /* See try_to_follow_renames() in tree-diff.c */ + diff_resolve_rename_copy(); diffcore_apply_filter(options->filter); if (diff_queued_diff.nr && !DIFF_OPT_TST(options, DIFF_FROM_CONTENTS)) @@ -4085,7 +4186,7 @@ void diffcore_std(struct diff_options *options) else DIFF_OPT_CLR(options, HAS_CHANGES); - diff_queued_diff.run = 1; + options->found_follow = 0; } int diff_result_code(struct diff_options *opt, int status) @@ -4103,6 +4204,24 @@ int diff_result_code(struct diff_options *opt, int status) return result; } +/* + * Shall changes to this submodule be ignored? + * + * Submodule changes can be configured to be ignored separately for each path, + * but that configuration can be overridden from the command line. + */ +static int is_submodule_ignored(const char *path, struct diff_options *options) +{ + int ignored = 0; + unsigned orig_flags = options->flags; + if (!DIFF_OPT_TST(options, OVERRIDE_SUBMODULE_CONFIG)) + set_diffopt_flags_from_submodule_config(options, path); + if (DIFF_OPT_TST(options, IGNORE_SUBMODULES)) + ignored = 1; + options->flags = orig_flags; + return ignored; +} + void diff_addremove(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, @@ -4110,7 +4229,7 @@ void diff_addremove(struct diff_options *options, { struct diff_filespec *one, *two; - if (DIFF_OPT_TST(options, IGNORE_SUBMODULES) && S_ISGITLINK(mode)) + if (S_ISGITLINK(mode) && is_submodule_ignored(concatpath, options)) return; /* This may look odd, but it is a preparation for @@ -4157,8 +4276,8 @@ void diff_change(struct diff_options *options, { struct diff_filespec *one, *two; - if (DIFF_OPT_TST(options, IGNORE_SUBMODULES) && S_ISGITLINK(old_mode) - && S_ISGITLINK(new_mode)) + if (S_ISGITLINK(old_mode) && S_ISGITLINK(new_mode) && + is_submodule_ignored(concatpath, options)) return; if (DIFF_OPT_TST(options, REVERSE_DIFF)) { @@ -77,6 +77,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data) #define DIFF_OPT_DIRTY_SUBMODULES (1 << 24) #define DIFF_OPT_IGNORE_UNTRACKED_IN_SUBMODULES (1 << 25) #define DIFF_OPT_IGNORE_DIRTY_SUBMODULES (1 << 26) +#define DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG (1 << 27) #define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag) #define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag) @@ -126,6 +127,9 @@ struct diff_options { /* this is set by diffcore for DIFF_FORMAT_PATCH */ int found_changes; + /* to support internal diff recursion by --follow hack*/ + int found_follow; + FILE *file; int close_file; @@ -214,6 +218,13 @@ extern void diff_unmerge(struct diff_options *, #define DIFF_SETUP_USE_CACHE 2 #define DIFF_SETUP_USE_SIZE_CACHE 4 +/* + * Poor man's alternative to parse-option, to allow both sticked form + * (--option=value) and separate form (--option value). + */ +extern int parse_long_opt(const char *opt, const char **argv, + const char **optarg); + extern int git_diff_basic_config(const char *var, const char *value, void *cb); extern int git_diff_ui_config(const char *var, const char *value, void *cb); extern int diff_use_color_default; diff --git a/diffcore.h b/diffcore.h index 491bea0b44..8b3241ad13 100644 --- a/diffcore.h +++ b/diffcore.h @@ -18,7 +18,7 @@ #define MAX_SCORE 60000.0 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */ #define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */ -#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */ +#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen (60%) */ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ @@ -91,14 +91,12 @@ struct diff_queue_struct { struct diff_filepair **queue; int alloc; int nr; - int run; }; #define DIFF_QUEUE_CLEAR(q) \ do { \ (q)->queue = NULL; \ (q)->nr = (q)->alloc = 0; \ - (q)->run = 0; \ - } while(0); + } while (0) extern struct diff_queue_struct diff_queued_diff; extern struct diff_filepair *diff_queue(struct diff_queue_struct *, @@ -118,9 +116,9 @@ void diff_debug_filespec(struct diff_filespec *, int, const char *); void diff_debug_filepair(const struct diff_filepair *, int); void diff_debug_queue(const char *, struct diff_queue_struct *); #else -#define diff_debug_filespec(a,b,c) do {} while(0) -#define diff_debug_filepair(a,b) do {} while(0) -#define diff_debug_queue(a,b) do {} while(0) +#define diff_debug_filespec(a,b,c) do { /* nothing */ } while (0) +#define diff_debug_filepair(a,b) do { /* nothing */ } while (0) +#define diff_debug_queue(a,b) do { /* nothing */ } while (0) #endif extern int diffcore_count_changes(struct diff_filespec *src, diff --git a/environment.c b/environment.c index 83d38d3c23..eeb26876a1 100644 --- a/environment.c +++ b/environment.c @@ -53,6 +53,7 @@ enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE; char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; +struct startup_info *startup_info; /* Parallel index stat data preload? */ int core_preload_index = 0; diff --git a/fast-import.c b/fast-import.c index 1e5d66ed0a..2317b0fe75 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1528,6 +1528,14 @@ static int tree_content_remove( for (i = 0; i < t->entry_count; i++) { e = t->entries[i]; if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) { + if (slash1 && !S_ISDIR(e->versions[1].mode)) + /* + * If p names a file in some subdirectory, and a + * file or symlink matching the name of the + * parent directory of p exists, then p cannot + * exist and need not be deleted. + */ + return 1; if (!slash1 || !S_ISDIR(e->versions[1].mode)) goto del_entry; if (!e->tree) @@ -1666,7 +1674,7 @@ static void dump_marks_helper(FILE *f, if (m->shift) { for (k = 0; k < 1024; k++) { if (m->data.sets[k]) - dump_marks_helper(f, (base + k) << m->shift, + dump_marks_helper(f, base + (k << m->shift), m->data.sets[k]); } } else { @@ -2131,6 +2139,7 @@ static void file_change_m(struct branch *b) case S_IFREG | 0644: case S_IFREG | 0755: case S_IFLNK: + case S_IFDIR: case S_IFGITLINK: /* ok */ break; @@ -2176,23 +2185,28 @@ static void file_change_m(struct branch *b) * another repository. */ } else if (inline_data) { + if (S_ISDIR(mode)) + die("Directories cannot be specified 'inline': %s", + command_buf.buf); if (p != uq.buf) { strbuf_addstr(&uq, p); p = uq.buf; } read_next_command(); parse_and_store_blob(&last_blob, sha1, 0); - } else if (oe) { - if (oe->type != OBJ_BLOB) - die("Not a blob (actually a %s): %s", - typename(oe->type), command_buf.buf); } else { - enum object_type type = sha1_object_info(sha1, NULL); + enum object_type expected = S_ISDIR(mode) ? + OBJ_TREE: OBJ_BLOB; + enum object_type type = oe ? oe->type : + sha1_object_info(sha1, NULL); if (type < 0) - die("Blob not found: %s", command_buf.buf); - if (type != OBJ_BLOB) - die("Not a blob (actually a %s): %s", - typename(type), command_buf.buf); + die("%s not found: %s", + S_ISDIR(mode) ? "Tree" : "Blob", + command_buf.buf); + if (type != expected) + die("Not a %s (actually a %s): %s", + typename(expected), typename(type), + command_buf.buf); } tree_content_set(&b->branch_tree, p, sha1, mode, NULL); diff --git a/git-compat-util.h b/git-compat-util.h index 02a73eeb66..877096ecb0 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -312,6 +312,11 @@ extern size_t gitstrlcpy(char *, const char *, size_t); extern uintmax_t gitstrtoumax(const char *, char **, int); #endif +#ifdef NO_STRTOK_R +#define strtok_r gitstrtok_r +extern char *gitstrtok_r(char *s, const char *delim, char **save_ptr); +#endif + #ifdef NO_HSTRERROR #define hstrerror githstrerror extern const char *githstrerror(int herror); @@ -388,6 +393,8 @@ extern int odb_pack_keep(char *name, size_t namesz, unsigned char *sha1); static inline size_t xsize_t(off_t len) { + if (len > (size_t) len) + die("Cannot handle files this big"); return (size_t)len; } diff --git a/git-gui/git-gui.sh b/git-gui/git-gui.sh index 7d5451198c..bb104895a9 100755 --- a/git-gui/git-gui.sh +++ b/git-gui/git-gui.sh @@ -38,7 +38,7 @@ if {[catch {package require Tcl 8.4} err] tk_messageBox \ -icon error \ -type ok \ - -title [mc "git-gui: fatal error"] \ + -title "git-gui: fatal error" \ -message $err exit 1 } @@ -269,6 +269,17 @@ proc is_config_true {name} { } } +proc is_config_false {name} { + global repo_config + if {[catch {set v $repo_config($name)}]} { + return 0 + } elseif {$v eq {false} || $v eq {0} || $v eq {no}} { + return 1 + } else { + return 0 + } +} + proc get_config {name} { global repo_config if {[catch {set v $repo_config($name)}]} { @@ -323,6 +334,8 @@ proc _trace_exec {cmd} { puts stderr $d } +#'" fix poor old emacs font-lock mode + proc _git_cmd {name} { global _git_cmd_path @@ -416,6 +429,9 @@ proc _lappend_nice {cmd_var} { if {![info exists _nice]} { set _nice [_which nice] + if {[catch {exec $_nice git version}]} { + set _nice {} + } } if {$_nice ne {}} { lappend cmd $_nice @@ -634,6 +650,7 @@ proc rmsel_tag {text} { return $text } +wm withdraw . set root_exists 0 bind . <Visibility> { bind . <Visibility> {} @@ -782,6 +799,7 @@ set default_config(user.email) {} set default_config(gui.encoding) [encoding system] set default_config(gui.matchtrackingbranch) false +set default_config(gui.textconv) true set default_config(gui.pruneduringfetch) false set default_config(gui.trustmtime) false set default_config(gui.fastcopyblame) false @@ -1155,6 +1173,9 @@ apply_config # try to set work tree from environment, falling back to core.worktree if {[catch { set _gitworktree $env(GIT_WORK_TREE) }]} { set _gitworktree [get_config core.worktree] + if {$_gitworktree eq ""} { + set _gitworktree [file dirname [file normalize $_gitdir]] + } } if {$_prefix ne {}} { if {$_gitworktree eq {}} { @@ -2098,7 +2119,7 @@ proc do_explore {} { # freedesktop.org-conforming system is our best shot set explorer "xdg-open" } - eval exec $explorer $_gitworktree & + eval exec $explorer [list [file nativename $_gitworktree]] & } set is_quitting 0 @@ -2901,6 +2922,7 @@ blame { set current_branch $head } + wm deiconify . switch -- $subcommand { browser { if {$jump_spec ne {}} usage @@ -3405,6 +3427,19 @@ lappend diff_actions [list $ctxmsm entryconf [$ctxmsm index last] -state] $ctxmsm add separator create_common_diff_popup $ctxmsm +proc has_textconv {path} { + if {[is_config_false gui.textconv]} { + return 0 + } + set filter [gitattr $path diff set] + set textconv [get_config [join [list diff $filter textconv] .]] + if {$filter ne {set} && $textconv ne {}} { + return 1 + } else { + return 0 + } +} + proc popup_diff_menu {ctxm ctxmmg ctxmsm x y X Y} { global current_diff_path file_states set ::cursorX $x @@ -3440,7 +3475,8 @@ proc popup_diff_menu {ctxm ctxmmg ctxmsm x y X Y} { || {__} eq $state || {_O} eq $state || {_T} eq $state - || {T_} eq $state} { + || {T_} eq $state + || [has_textconv $current_diff_path]} { set s disabled } else { set s normal @@ -3460,29 +3496,44 @@ $main_status show [mc "Initializing..."] # -- Load geometry # -catch { -set gm $repo_config(gui.geometry) -wm geometry . [lindex $gm 0] -if {$use_ttk} { - .vpane sashpos 0 [lindex $gm 1] - .vpane.files sashpos 0 [lindex $gm 2] -} else { - .vpane sash place 0 \ - [lindex $gm 1] \ - [lindex [.vpane sash coord 0] 1] - .vpane.files sash place 0 \ - [lindex [.vpane.files sash coord 0] 0] \ - [lindex $gm 2] +proc on_ttk_pane_mapped {w pane pos} { + bind $w <Map> {} + after 0 [list after idle [list $w sashpos $pane $pos]] +} +proc on_tk_pane_mapped {w pane x y} { + bind $w <Map> {} + after 0 [list after idle [list $w sash place $pane $x $y]] +} +proc on_application_mapped {} { + global repo_config use_ttk + bind . <Map> {} + set gm $repo_config(gui.geometry) + if {$use_ttk} { + bind .vpane <Map> \ + [list on_ttk_pane_mapped %W 0 [lindex $gm 1]] + bind .vpane.files <Map> \ + [list on_ttk_pane_mapped %W 0 [lindex $gm 2]] + } else { + bind .vpane <Map> \ + [list on_tk_pane_mapped %W 0 \ + [lindex $gm 1] \ + [lindex [.vpane sash coord 0] 1]] + bind .vpane.files <Map> \ + [list on_tk_pane_mapped %W 0 \ + [lindex [.vpane.files sash coord 0] 0] \ + [lindex $gm 2]] + } + wm geometry . [lindex $gm 0] } -unset gm +if {[info exists repo_config(gui.geometry)]} { + bind . <Map> [list on_application_mapped] + wm geometry . [lindex $repo_config(gui.geometry) 0] } # -- Load window state # -catch { -set gws $repo_config(gui.wmstate) -wm state . $gws -unset gws +if {[info exists repo_config(gui.wmstate)]} { + catch {wm state . $repo_config(gui.wmstate)} } # -- Key Bindings diff --git a/git-gui/lib/blame.tcl b/git-gui/lib/blame.tcl index 786b50b8c2..2137ec9684 100644 --- a/git-gui/lib/blame.tcl +++ b/git-gui/lib/blame.tcl @@ -449,11 +449,28 @@ method _load {jump} { $status show [mc "Reading %s..." "$commit:[escape_path $path]"] $w_path conf -text [escape_path $path] + + set do_textconv 0 + if {![is_config_false gui.textconv] && [git-version >= 1.7.2]} { + set filter [gitattr $path diff set] + set textconv [get_config [join [list diff $filter textconv] .]] + if {$filter ne {set} && $textconv ne {}} { + set do_textconv 1 + } + } if {$commit eq {}} { - set fd [open $path r] + if {$do_textconv ne 0} { + set fd [open |[list $textconv $path] r] + } else { + set fd [open $path r] + } fconfigure $fd -eofchar {} } else { - set fd [git_read cat-file blob "$commit:$path"] + if {$do_textconv ne 0} { + set fd [git_read cat-file --textconv "$commit:$path"] + } else { + set fd [git_read cat-file blob "$commit:$path"] + } } fconfigure $fd \ -blocking 0 \ diff --git a/git-gui/lib/choose_repository.tcl b/git-gui/lib/choose_repository.tcl index 64f06748b6..fae119286d 100644 --- a/git-gui/lib/choose_repository.tcl +++ b/git-gui/lib/choose_repository.tcl @@ -100,12 +100,17 @@ constructor pick {} { $opts insert end [mc "Clone Existing Repository"] link_clone $opts insert end "\n" if {$m_repo ne {}} { + if {[tk windowingsystem] eq "win32"} { + set key L + } else { + set key C + } $m_repo add command \ -command [cb _next clone] \ - -accelerator $M1T-C \ + -accelerator $M1T-$key \ -label [mc "Clone..."] - bind $top <$M1B-c> [cb _next clone] - bind $top <$M1B-C> [cb _next clone] + bind $top <$M1B-[string tolower $key]> [cb _next clone] + bind $top <$M1B-[string toupper $key]> [cb _next clone] } $opts tag conf link_open -foreground blue -underline 1 diff --git a/git-gui/lib/diff.tcl b/git-gui/lib/diff.tcl index ec8c11eeb7..c628750276 100644 --- a/git-gui/lib/diff.tcl +++ b/git-gui/lib/diff.tcl @@ -55,7 +55,7 @@ proc handle_empty_diff {} { set path $current_diff_path set s $file_states($path) - if {[lindex $s 0] ne {_M}} return + if {[lindex $s 0] ne {_M} || [has_textconv $path]} return # Prevent infinite rescan loops incr diff_empty_count @@ -280,6 +280,9 @@ proc start_show_diff {cont_info {add_opts {}}} { lappend cmd diff-files } } + if {![is_config_false gui.textconv] && [git-version >= 1.6.1]} { + lappend cmd --textconv + } if {[string match {160000 *} [lindex $s 2]] || [string match {160000 *} [lindex $s 3]]} { diff --git a/git-gui/lib/option.tcl b/git-gui/lib/option.tcl index d4c5e45c8a..3807c8d283 100644 --- a/git-gui/lib/option.tcl +++ b/git-gui/lib/option.tcl @@ -148,6 +148,7 @@ proc do_options {} { {b gui.trustmtime {mc "Trust File Modification Timestamps"}} {b gui.pruneduringfetch {mc "Prune Tracking Branches During Fetch"}} {b gui.matchtrackingbranch {mc "Match Tracking Branches"}} + {b gui.textconv {mc "Use Textconv For Diffs and Blames"}} {b gui.fastcopyblame {mc "Blame Copy Only On Changed Files"}} {i-20..200 gui.copyblamethreshold {mc "Minimum Letters To Blame Copy On"}} {i-0..300 gui.blamehistoryctx {mc "Blame History Context Radius (days)"}} diff --git a/git-gui/lib/shortcut.tcl b/git-gui/lib/shortcut.tcl index 79c1888e11..78878ef89d 100644 --- a/git-gui/lib/shortcut.tcl +++ b/git-gui/lib/shortcut.tcl @@ -16,7 +16,7 @@ proc do_windows_shortcut {} { [info nameofexecutable] \ [file normalize $::argv0] \ ] \ - [file normalize [$_gitworktree]] + [file normalize $_gitworktree] } err]} { error_popup [strcat [mc "Cannot write shortcut:"] "\n\n$err"] } @@ -57,7 +57,7 @@ proc do_cygwin_shortcut {} { $sh -c \ "CHERE_INVOKING=1 source /etc/profile;[sq $me] &" \ ] \ - [file normalize [$_gitworktree]] + [file normalize $_gitworktree] } err]} { error_popup [strcat [mc "Cannot write shortcut:"] "\n\n$err"] } diff --git a/git-gui/lib/status_bar.tcl b/git-gui/lib/status_bar.tcl index 5fe3aad382..95cb44991f 100644 --- a/git-gui/lib/status_bar.tcl +++ b/git-gui/lib/status_bar.tcl @@ -39,6 +39,7 @@ method _oneline_pack {} { } constructor two_line {path} { + global NS set w $path set w_l $w.l set w_c $w.c diff --git a/git-gui/lib/win32.tcl b/git-gui/lib/win32.tcl index d7f93d045d..db91ab84a5 100644 --- a/git-gui/lib/win32.tcl +++ b/git-gui/lib/win32.tcl @@ -18,9 +18,9 @@ proc win32_create_lnk {lnk_path lnk_exec lnk_dir} { eval [list exec wscript.exe \ /E:jscript \ /nologo \ - [file join $oguilib win32_shortcut.js] \ + [file nativename [file join $oguilib win32_shortcut.js]] \ $lnk_path \ - [file join $oguilib git-gui.ico] \ + [file nativename [file join $oguilib git-gui.ico]] \ $lnk_dir \ $lnk_exec] $lnk_args } diff --git a/git-gui/windows/git-gui.sh b/git-gui/windows/git-gui.sh index 66bbb2f8fa..b1845c5055 100644 --- a/git-gui/windows/git-gui.sh +++ b/git-gui/windows/git-gui.sh @@ -13,10 +13,11 @@ if { $argc >=2 && [lindex $argv 0] == "--working-dir" } { incr argc -2 } -set bindir [file dirname \ +set basedir [file dirname \ [file dirname \ [file dirname [info script]]]] -set bindir [file join $bindir bin] +set bindir [file join $basedir bin] +set bindir "$bindir;[file join $basedir mingw bin]" regsub -all ";" $bindir "\\;" bindir set env(PATH) "$bindir;$env(PATH)" unset bindir diff --git a/git-instaweb.sh b/git-instaweb.sh index 6635fbefdf..e6f6ecda17 100755 --- a/git-instaweb.sh +++ b/git-instaweb.sh @@ -43,7 +43,8 @@ test -z "$port" && port=1234 resolve_full_httpd () { case "$httpd" in - *apache2*|*lighttpd*) + *apache2*|*lighttpd*|*httpd*) + # yes, *httpd* covers *lighttpd* above, but it is there for clarity # ensure that the apache2/lighttpd command ends with "-f" if ! echo "$httpd" | sane_grep -- '-f *$' >/dev/null 2>&1 then @@ -56,6 +57,13 @@ resolve_full_httpd () { httpd_only="${httpd%% *}" # cut on first space return ;; + *webrick*) + # server is started by running via generated webrick.rb in + # $fqgitdir/gitweb + full_httpd="$fqgitdir/gitweb/webrick.rb" + httpd_only="${httpd%% *}" # cut on first space + return + ;; esac httpd_only="$(echo $httpd | cut -f1 -d' ')" @@ -187,40 +195,53 @@ GITWEB_CONFIG="$fqgitdir/gitweb/gitweb_config.perl" export GIT_EXEC_PATH GIT_DIR GITWEB_CONFIG webrick_conf () { + # webrick seems to have no way of passing arbitrary environment + # variables to the underlying CGI executable, so we wrap the + # actual gitweb.cgi using a shell script to force it + wrapper="$fqgitdir/gitweb/$httpd/wrapper.sh" + cat > "$wrapper" <<EOF +#!/bin/sh +# we use this shell script wrapper around the real gitweb.cgi since +# there appears to be no other way to pass arbitrary environment variables +# into the CGI process +GIT_EXEC_PATH=$GIT_EXEC_PATH GIT_DIR=$GIT_DIR GITWEB_CONFIG=$GITWEB_CONFIG +export GIT_EXEC_PATH GIT_DIR GITWEB_CONFIG +exec $root/gitweb.cgi +EOF + chmod +x "$wrapper" + + # This assumes _ruby_ is in the user's $PATH. that's _one_ + # portable way to run ruby, which could be installed anywhere, really. # generate a standalone server script in $fqgitdir/gitweb. cat >"$fqgitdir/gitweb/$httpd.rb" <<EOF +#!/usr/bin/env ruby require 'webrick' -require 'yaml' -options = YAML::load_file(ARGV[0]) -options[:StartCallback] = proc do - File.open(options[:PidFile],"w") do |f| - f.puts Process.pid - end -end -options[:ServerType] = WEBrick::Daemon +require 'logger' +options = { + :Port => $port, + :DocumentRoot => "$root", + :Logger => Logger.new('$fqgitdir/gitweb/error.log'), + :AccessLog => [ + [ Logger.new('$fqgitdir/gitweb/access.log'), + WEBrick::AccessLog::COMBINED_LOG_FORMAT ] + ], + :DirectoryIndex => ["gitweb.cgi"], + :CGIInterpreter => "$wrapper", + :StartCallback => lambda do + File.open("$fqgitdir/pid", "w") { |f| f.puts Process.pid } + end, + :ServerType => WEBrick::Daemon, +} +options[:BindAddress] = '127.0.0.1' if "$local" == "true" server = WEBrick::HTTPServer.new(options) ['INT', 'TERM'].each do |signal| trap(signal) {server.shutdown} end server.start EOF - # generate a shell script to invoke the above ruby script, - # which assumes _ruby_ is in the user's $PATH. that's _one_ - # portable way to run ruby, which could be installed anywhere, - # really. - cat >"$fqgitdir/gitweb/$httpd" <<EOF -#!/bin/sh -exec ruby "$fqgitdir/gitweb/$httpd.rb" \$* -EOF - chmod +x "$fqgitdir/gitweb/$httpd" - - cat >"$conf" <<EOF -:Port: $port -:DocumentRoot: "$root" -:DirectoryIndex: ["gitweb.cgi"] -:PidFile: "$fqgitdir/pid" -EOF - test "$local" = true && echo ':BindAddress: "127.0.0.1"' >> "$conf" + chmod +x "$fqgitdir/gitweb/$httpd.rb" + # configuration is embedded in server script file, webrick.rb + rm -f "$conf" } lighttpd_conf () { @@ -300,7 +321,13 @@ EOF } apache2_conf () { - test -z "$module_path" && module_path=/usr/lib/apache2/modules + if test -z "$module_path" + then + test -d "/usr/lib/httpd/modules" && + module_path="/usr/lib/httpd/modules" + test -d "/usr/lib/apache2/modules" && + module_path="/usr/lib/apache2/modules" + fi bind= test x"$local" = xtrue && bind='127.0.0.1:' echo 'text/css css' > "$fqgitdir/mime.types" @@ -314,8 +341,10 @@ PidFile "$fqgitdir/pid" Listen $bind$port EOF - for mod in mime dir; do - if test -e $module_path/mod_${mod}.so; then + for mod in mime dir env log_config + do + if test -e $module_path/mod_${mod}.so + then echo "LoadModule ${mod}_module " \ "$module_path/mod_${mod}.so" >> "$conf" fi @@ -334,7 +363,7 @@ EOF cat >> "$conf" <<EOF LoadModule perl_module $module_path/mod_perl.so PerlPassEnv GIT_DIR -PerlPassEnv GIT_EXEC_DIR +PerlPassEnv GIT_EXEC_PATH PerlPassEnv GITWEB_CONFIG <Location /gitweb.cgi> SetHandler perl-script @@ -364,6 +393,9 @@ EOF echo "ScriptSock logs/gitweb.sock" >> "$conf" fi cat >> "$conf" <<EOF +PassEnv GIT_DIR +PassEnv GIT_EXEC_PATH +PassEnv GITWEB_CONFIG AddHandler cgi-script .cgi <Location /gitweb.cgi> Options +ExecCGI @@ -560,7 +592,7 @@ case "$httpd" in *lighttpd*) lighttpd_conf ;; -*apache2*) +*apache2*|*httpd*) apache2_conf ;; webrick) diff --git a/git-mergetool--lib.sh b/git-mergetool--lib.sh index 51dd0d67ba..b5e1943b1d 100644 --- a/git-mergetool--lib.sh +++ b/git-mergetool--lib.sh @@ -35,7 +35,7 @@ check_unchanged () { while true; do echo "$MERGED seems unchanged." printf "Was the merge successful? [y/n] " - read answer < /dev/tty + read answer case "$answer" in y*|Y*) status=0; break ;; n*|N*) status=1; break ;; diff --git a/git-mergetool.sh b/git-mergetool.sh index b52a7410bc..2f8dc441c6 100755 --- a/git-mergetool.sh +++ b/git-mergetool.sh @@ -264,24 +264,46 @@ merge_keep_temporaries="$(git config --bool mergetool.keepTemporaries || echo fa last_status=0 rollup_status=0 +rerere=false + +files_to_merge() { + if test "$rerere" = true + then + git rerere status + else + git ls-files -u | sed -e 's/^[^ ]* //' | sort -u + fi +} + if test $# -eq 0 ; then - files=$(git ls-files -u | sed -e 's/^[^ ]* //' | sort -u) + cd_to_toplevel + + if test -e "$GIT_DIR/MERGE_RR" + then + rerere=true + fi + + files=$(files_to_merge) if test -z "$files" ; then echo "No files need merging" exit 0 fi - echo Merging the files: "$files" - git ls-files -u | - sed -e 's/^[^ ]* //' | - sort -u | + + # Save original stdin + exec 3<&0 + + printf "Merging:\n" + printf "$files\n" + + files_to_merge | while IFS= read i do if test $last_status -ne 0; then - prompt_after_failed_merge < /dev/tty || exit 1 + prompt_after_failed_merge <&3 || exit 1 fi printf "\n" - merge_file "$i" < /dev/tty > /dev/tty + merge_file "$i" <&3 last_status=$? if test $last_status -ne 0; then rollup_status=1 diff --git a/git-pull.sh b/git-pull.sh index a09a44ec4c..8eb74d45de 100755 --- a/git-pull.sh +++ b/git-pull.sh @@ -273,6 +273,15 @@ then exit fi +if test true = "$rebase" +then + o=$(git show-branch --merge-base $curr_branch $merge_head $oldremoteref) + if test "$oldremoteref" = "$o" + then + unset oldremoteref + fi +fi + merge_name=$(git fmt-merge-msg $log_arg <"$GIT_DIR/FETCH_HEAD") || exit case "$rebase" in true) diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh index 31e68603f4..eb2dff55f8 100755 --- a/git-rebase--interactive.sh +++ b/git-rebase--interactive.sh @@ -111,15 +111,16 @@ VERBOSE= OK_TO_SKIP_PRE_REBASE= REBASE_ROOT= AUTOSQUASH= +test "$(git config --bool rebase.autosquash)" = "true" && AUTOSQUASH=t NEVER_FF= -GIT_CHERRY_PICK_HELP=" After resolving the conflicts, -mark the corrected paths with 'git add <paths>', and -run 'git rebase --continue'" +GIT_CHERRY_PICK_HELP="\ +hint: after resolving the conflicts, mark the corrected paths +hint: with 'git add <paths>' and run 'git rebase --continue'" export GIT_CHERRY_PICK_HELP warn () { - echo "$*" >&2 + printf '%s\n' "$*" >&2 } output () { @@ -537,6 +538,34 @@ do_next () { esac record_in_rewritten $sha1 ;; + x|"exec") + read -r command rest < "$TODO" + mark_action_done + printf 'Executing: %s\n' "$rest" + # "exec" command doesn't take a sha1 in the todo-list. + # => can't just use $sha1 here. + git rev-parse --verify HEAD > "$DOTEST"/stopped-sha + ${SHELL:-@SHELL_PATH@} -c "$rest" # Actual execution + status=$? + if test "$status" -ne 0 + then + warn "Execution failed: $rest" + warn "You can fix the problem, and then run" + warn + warn " git rebase --continue" + warn + exit "$status" + fi + # Run in subshell because require_clean_work_tree can die. + if ! (require_clean_work_tree) + then + warn "Commit or stash your changes, and then run" + warn + warn " git rebase --continue" + warn + exit 1 + fi + ;; *) warn "Unknown command: $command $sha1 $rest" if git rev-parse --verify -q "$sha1" >/dev/null @@ -591,22 +620,30 @@ do_rest () { # skip picking commits whose parents are unchanged skip_unnecessary_picks () { fd=3 - while read -r command sha1 rest + while read -r command rest do # fd=3 means we skip the command - case "$fd,$command,$(git rev-parse --verify --quiet $sha1^)" in - 3,pick,"$ONTO"*|3,p,"$ONTO"*) + case "$fd,$command" in + 3,pick|3,p) # pick a commit whose parent is current $ONTO -> skip - ONTO=$sha1 + sha1=$(printf '%s' "$rest" | cut -d ' ' -f 1) + case "$(git rev-parse --verify --quiet "$sha1"^)" in + "$ONTO"*) + ONTO=$sha1 + ;; + *) + fd=1 + ;; + esac ;; - 3,#*|3,,*) + 3,#*|3,) # copy comments ;; *) fd=1 ;; esac - echo "$command${sha1:+ }$sha1${rest:+ }$rest" >&$fd + printf '%s\n' "$command${rest:+ }$rest" >&$fd done <"$TODO" >"$TODO.new" 3>>"$DONE" && mv -f "$TODO".new "$TODO" && case "$(peek_next_command)" in @@ -649,12 +686,12 @@ rearrange_squash () { case " $used" in *" $sha1 "*) continue ;; esac - echo "$pick $sha1 $message" + printf '%s\n' "$pick $sha1 $message" while read -r squash action msg do case "$message" in "$msg"*) - echo "$action $squash $action! $msg" + printf '%s\n' "$action $squash $action! $msg" used="$used$squash " ;; esac @@ -795,6 +832,9 @@ first and then run 'git rebase --continue' again." --autosquash) AUTOSQUASH=t ;; + --no-autosquash) + AUTOSQUASH= + ;; --onto) shift ONTO=$(parse_onto "$1") || @@ -895,7 +935,7 @@ first and then run 'git rebase --continue' again." do if test t != "$PRESERVE_MERGES" then - echo "pick $shortsha1 $rest" >> "$TODO" + printf '%s\n' "pick $shortsha1 $rest" >> "$TODO" else sha1=$(git rev-parse $shortsha1) if test -z "$REBASE_ROOT" @@ -914,7 +954,7 @@ first and then run 'git rebase --continue' again." if test f = "$preserve" then touch "$REWRITTEN"/$sha1 - echo "pick $shortsha1 $rest" >> "$TODO" + printf '%s\n' "pick $shortsha1 $rest" >> "$TODO" fi fi done @@ -957,6 +997,7 @@ first and then run 'git rebase --continue' again." # e, edit = use commit, but stop for amending # s, squash = use commit, but meld into previous commit # f, fixup = like "squash", but discard this commit's log message +# x <cmd>, exec <cmd> = Run a shell command <cmd>, and stop if it fails # # If you remove a line here THAT COMMIT WILL BE LOST. # However, if you remove everything, the rebase will be aborted. diff --git a/git-rebase.sh b/git-rebase.sh index ab4afa7dee..7508463b30 100755 --- a/git-rebase.sh +++ b/git-rebase.sh @@ -44,6 +44,7 @@ To restore the original branch and stop rebasing run \"git rebase --abort\". " unset newbase strategy=recursive +strategy_opts= do_merge= dotest="$GIT_DIR"/rebase-merge prec=4 @@ -112,7 +113,7 @@ call_merge () { then export GIT_MERGE_VERBOSITY=1 fi - git-merge-$strategy "$cmt^" -- "$hd" "$cmt" + eval 'git-merge-$strategy' $strategy_opts '"$cmt^" -- "$hd" "$cmt"' rv=$? case "$rv" in 0) @@ -208,6 +209,7 @@ do test -d "$dotest" -o -d "$GIT_DIR"/rebase-apply || die "No rebase in progress?" + git update-index --ignore-submodules --refresh && git diff-files --quiet --ignore-submodules || { echo "You must edit all merge conflicts and then" echo "mark them as resolved using git add" @@ -293,6 +295,27 @@ do -M|-m|--m|--me|--mer|--merg|--merge) do_merge=t ;; + -X*|--strategy-option*) + case "$#,$1" in + 1,-X|1,--strategy-option) + usage ;; + *,-X|*,--strategy-option) + newopt="$2" + shift ;; + *,--strategy-option=*) + newopt="$(expr " $1" : ' --strategy-option=\(.*\)')" ;; + *,-X*) + newopt="$(expr " $1" : ' -X\(.*\)')" ;; + 1,*) + usage ;; + esac + strategy_opts="$strategy_opts $(git rev-parse --sq-quote "--$newopt")" + do_merge=t + if test -n "$strategy" + then + strategy=recursive + fi + ;; -s=*|--s=*|--st=*|--str=*|--stra=*|--strat=*|--strate=*|\ --strateg=*|--strategy=*|\ -s|--s|--st|--str|--stra|--strat|--strate|--strateg|--strategy) @@ -345,7 +368,7 @@ do --root) rebase_root=t ;; - -f|--f|--fo|--for|--forc|force|--force-r|--force-re|--force-reb|--force-reba|--force-rebas|--force-rebase|--no-ff) + -f|--f|--fo|--for|--forc|--force|--force-r|--force-re|--force-reb|--force-reba|--force-rebas|--force-rebase|--no-ff) force_rebase=t ;; --rerere-autoupdate|--no-rerere-autoupdate) @@ -543,7 +566,7 @@ fi if test -z "$do_merge" then git format-patch -k --stdout --full-index --ignore-if-in-upstream \ - $root_flag "$revisions" | + --no-renames $root_flag "$revisions" | git am $git_am_opt --rebasing --resolvemsg="$RESOLVEMSG" && move_to_original_branch ret=$? diff --git a/git-submodule.sh b/git-submodule.sh index 170186f494..9ebbab798d 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -839,10 +839,11 @@ cmd_sync() if test -e "$path"/.git then ( + say "Synchronizing submodule url for '$name'" + git config submodule."$name".url "$url" clear_local_git_env cd "$path" remote=$(get_default_remote) - say "Synchronizing submodule url for '$name'" git config remote."$remote".url "$url" ) fi diff --git a/git-svn.perl b/git-svn.perl index c4163584a9..9b046b693f 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -494,6 +494,7 @@ sub cmd_set_tree { sub cmd_dcommit { my $head = shift; + command_noisy(qw/update-index --refresh/); git_cmd_try { command_oneline(qw/diff-index --quiet HEAD/) } 'Cannot dcommit with a dirty index. Commit your changes first, ' . "or stash them with `git stash'.\n"; @@ -1819,6 +1820,7 @@ sub read_all_remotes { die("svn-remote.$remote: remote ref '$remote_ref' " . "must start with 'refs/'\n") unless $remote_ref =~ m{^refs/}; + $local_ref = uri_decode($local_ref); $r->{$remote}->{fetch}->{$local_ref} = $remote_ref; $r->{$remote}->{svm} = {} if $use_svm_props; } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) { @@ -1831,6 +1833,7 @@ sub read_all_remotes { die("svn-remote.$remote: remote ref '$remote_ref' ($t) " . "must start with 'refs/'\n") unless $remote_ref =~ m{^refs/}; + $local_ref = uri_decode($local_ref); my $rs = { t => $t, remote => $remote, @@ -2956,18 +2959,29 @@ sub other_gs { my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from); unless ($gs) { my $ref_id = $old_ref_id; - $ref_id =~ s/\@\d+$//; + $ref_id =~ s/\@\d+-*$//; $ref_id .= "\@$r"; # just grow a tail if we're not unique enough :x $ref_id .= '-' while find_ref($ref_id); - print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1; my ($u, $p, $repo_id) = ($new_url, '', $ref_id); if ($u =~ s#^\Q$url\E(/|$)##) { $p = $u; $u = $url; $repo_id = $self->{repo_id}; } - $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1); + while (1) { + # It is possible to tag two different subdirectories at + # the same revision. If the url for an existing ref + # does not match, we must either find a ref with a + # matching url or create a new ref by growing a tail. + $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1); + my (undef, $max_commit) = $gs->rev_map_max(1); + last if (!$max_commit); + my ($url) = ::cmt_metadata($max_commit); + last if ($url eq $gs->full_url); + $ref_id .= '-'; + } + print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1; } $gs } @@ -4050,6 +4064,7 @@ sub new { $self->{absent_dir} = {}; $self->{absent_file} = {}; $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new }); + $self->{pathnameencoding} = Git::config('svn.pathnameencoding'); $self; } @@ -4133,6 +4148,10 @@ sub open_directory { sub git_path { my ($self, $path) = @_; + if (my $enc = $self->{pathnameencoding}) { + require Encode; + Encode::from_to($path, 'UTF-8', $enc); + } if ($self->{path_strip}) { $path =~ s!$self->{path_strip}!! or die "Failed to strip path '$path' ($self->{path_strip})\n"; @@ -4521,6 +4540,10 @@ sub split_path { sub repo_path { my ($self, $path) = @_; + if (my $enc = $self->{pathnameencoding}) { + require Encode; + Encode::from_to($path, $enc, 'UTF-8'); + } $self->{path_prefix}.(defined $path ? $path : ''); } diff --git a/git-web--browse.sh b/git-web--browse.sh index dbded76aaf..3fc4166b25 100755 --- a/git-web--browse.sh +++ b/git-web--browse.sh @@ -31,7 +31,7 @@ valid_custom_tool() valid_tool() { case "$1" in - firefox | iceweasel | chrome | chromium | konqueror | w3m | links | lynx | dillo | open | start) + firefox | iceweasel | chrome | google-chrome | chromium | konqueror | w3m | links | lynx | dillo | open | start) ;; # happy *) valid_custom_tool "$1" || return 1 @@ -103,7 +103,7 @@ fi if test -z "$browser" ; then if test -n "$DISPLAY"; then - browser_candidates="firefox iceweasel chrome chromium konqueror w3m links lynx dillo" + browser_candidates="firefox iceweasel google-chrome chrome chromium konqueror w3m links lynx dillo" if test "$KDE_FULL_SESSION" = "true"; then browser_candidates="konqueror $browser_candidates" fi @@ -146,7 +146,7 @@ case "$browser" in test "$vers" -lt 2 && NEWTAB='' "$browser_path" $NEWTAB "$@" & ;; - chrome|chromium) + google-chrome|chrome|chromium) # Actual command for chromium is chromium-browser. # No need to specify newTab. It's default in chromium eval "$browser_path" "$@" & @@ -8,12 +8,13 @@ const char git_usage_string[] = "git [--version] [--exec-path[=GIT_EXEC_PATH]] [--html-path]\n" " [-p|--paginate|--no-pager] [--no-replace-objects]\n" " [--bare] [--git-dir=GIT_DIR] [--work-tree=GIT_WORK_TREE]\n" - " [-c name=value\n" - " [--help] COMMAND [ARGS]"; + " [-c name=value] [--help]\n" + " COMMAND [ARGS]"; const char git_more_info_string[] = "See 'git help COMMAND' for more information on a specific command."; +static struct startup_info git_startup_info; static int use_pager = -1; struct pager_config { const char *cmd; @@ -188,7 +189,8 @@ static int handle_alias(int *argcp, const char ***argv) } count = split_cmdline(alias_string, &new_argv); if (count < 0) - die("Bad alias.%s string", alias_command); + die("Bad alias.%s string: %s", alias_command, + split_cmdline_strerror(count)); option_count = handle_options(&new_argv, &count, &envchanged); if (envchanged) die("alias '%s' changes environment variables\n" @@ -229,13 +231,14 @@ static int handle_alias(int *argcp, const char ***argv) const char git_version_string[] = GIT_VERSION; -#define RUN_SETUP (1<<0) -#define USE_PAGER (1<<1) +#define RUN_SETUP (1<<0) +#define RUN_SETUP_GENTLY (1<<1) +#define USE_PAGER (1<<2) /* * require working tree to be present -- anything uses this needs * RUN_SETUP for reading from the configuration file. */ -#define NEED_WORK_TREE (1<<2) +#define NEED_WORK_TREE (1<<3) struct cmd_struct { const char *cmd; @@ -254,8 +257,12 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (!help) { if (p->option & RUN_SETUP) prefix = setup_git_directory(); + if (p->option & RUN_SETUP_GENTLY) { + int nongit_ok; + prefix = setup_git_directory_gently(&nongit_ok); + } - if (use_pager == -1 && p->option & RUN_SETUP) + if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY)) use_pager = check_pager_config(p->cmd); if (use_pager == -1 && p->option & USE_PAGER) use_pager = 1; @@ -295,12 +302,12 @@ static void handle_internal_command(int argc, const char **argv) { "add", cmd_add, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, { "annotate", cmd_annotate, RUN_SETUP }, - { "apply", cmd_apply }, + { "apply", cmd_apply, RUN_SETUP_GENTLY }, { "archive", cmd_archive }, { "bisect--helper", cmd_bisect__helper, RUN_SETUP | NEED_WORK_TREE }, { "blame", cmd_blame, RUN_SETUP }, { "branch", cmd_branch, RUN_SETUP }, - { "bundle", cmd_bundle }, + { "bundle", cmd_bundle, RUN_SETUP_GENTLY }, { "cat-file", cmd_cat_file, RUN_SETUP }, { "checkout", cmd_checkout, RUN_SETUP | NEED_WORK_TREE }, { "checkout-index", cmd_checkout_index, @@ -313,7 +320,7 @@ static void handle_internal_command(int argc, const char **argv) { "clean", cmd_clean, RUN_SETUP | NEED_WORK_TREE }, { "commit", cmd_commit, RUN_SETUP | NEED_WORK_TREE }, { "commit-tree", cmd_commit_tree, RUN_SETUP }, - { "config", cmd_config }, + { "config", cmd_config, RUN_SETUP_GENTLY }, { "count-objects", cmd_count_objects, RUN_SETUP }, { "describe", cmd_describe, RUN_SETUP }, { "diff", cmd_diff }, @@ -330,21 +337,21 @@ static void handle_internal_command(int argc, const char **argv) { "fsck-objects", cmd_fsck, RUN_SETUP }, { "gc", cmd_gc, RUN_SETUP }, { "get-tar-commit-id", cmd_get_tar_commit_id }, - { "grep", cmd_grep }, + { "grep", cmd_grep, RUN_SETUP_GENTLY }, { "hash-object", cmd_hash_object }, { "help", cmd_help }, - { "index-pack", cmd_index_pack }, + { "index-pack", cmd_index_pack, RUN_SETUP_GENTLY }, { "init", cmd_init_db }, { "init-db", cmd_init_db }, - { "log", cmd_log, RUN_SETUP | USE_PAGER }, + { "log", cmd_log, RUN_SETUP }, { "ls-files", cmd_ls_files, RUN_SETUP }, { "ls-tree", cmd_ls_tree, RUN_SETUP }, - { "ls-remote", cmd_ls_remote }, + { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, { "mailinfo", cmd_mailinfo }, { "mailsplit", cmd_mailsplit }, { "merge", cmd_merge, RUN_SETUP | NEED_WORK_TREE }, { "merge-base", cmd_merge_base, RUN_SETUP }, - { "merge-file", cmd_merge_file }, + { "merge-file", cmd_merge_file, RUN_SETUP_GENTLY }, { "merge-index", cmd_merge_index, RUN_SETUP }, { "merge-ours", cmd_merge_ours, RUN_SETUP }, { "merge-recursive", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE }, @@ -360,7 +367,7 @@ static void handle_internal_command(int argc, const char **argv) { "pack-objects", cmd_pack_objects, RUN_SETUP }, { "pack-redundant", cmd_pack_redundant, RUN_SETUP }, { "patch-id", cmd_patch_id }, - { "peek-remote", cmd_ls_remote }, + { "peek-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, { "pickaxe", cmd_blame, RUN_SETUP }, { "prune", cmd_prune, RUN_SETUP }, { "prune-packed", cmd_prune_packed, RUN_SETUP }, @@ -370,7 +377,7 @@ static void handle_internal_command(int argc, const char **argv) { "reflog", cmd_reflog, RUN_SETUP }, { "remote", cmd_remote, RUN_SETUP }, { "replace", cmd_replace, RUN_SETUP }, - { "repo-config", cmd_config }, + { "repo-config", cmd_config, RUN_SETUP_GENTLY }, { "rerere", cmd_rerere, RUN_SETUP }, { "reset", cmd_reset, RUN_SETUP }, { "rev-list", cmd_rev_list, RUN_SETUP }, @@ -378,9 +385,9 @@ static void handle_internal_command(int argc, const char **argv) { "revert", cmd_revert, RUN_SETUP | NEED_WORK_TREE }, { "rm", cmd_rm, RUN_SETUP }, { "send-pack", cmd_send_pack, RUN_SETUP }, - { "shortlog", cmd_shortlog, USE_PAGER }, + { "shortlog", cmd_shortlog, RUN_SETUP_GENTLY | USE_PAGER }, { "show-branch", cmd_show_branch, RUN_SETUP }, - { "show", cmd_show, RUN_SETUP | USE_PAGER }, + { "show", cmd_show, RUN_SETUP }, { "status", cmd_status, RUN_SETUP | NEED_WORK_TREE }, { "stripspace", cmd_stripspace }, { "symbolic-ref", cmd_symbolic_ref, RUN_SETUP }, @@ -392,10 +399,10 @@ static void handle_internal_command(int argc, const char **argv) { "update-ref", cmd_update_ref, RUN_SETUP }, { "update-server-info", cmd_update_server_info, RUN_SETUP }, { "upload-archive", cmd_upload_archive }, - { "var", cmd_var }, + { "var", cmd_var, RUN_SETUP_GENTLY }, { "verify-tag", cmd_verify_tag, RUN_SETUP }, { "version", cmd_version }, - { "whatchanged", cmd_whatchanged, RUN_SETUP | USE_PAGER }, + { "whatchanged", cmd_whatchanged, RUN_SETUP }, { "write-tree", cmd_write_tree, RUN_SETUP }, { "verify-pack", cmd_verify_pack }, { "show-ref", cmd_show_ref, RUN_SETUP }, @@ -489,6 +496,8 @@ int main(int argc, const char **argv) { const char *cmd; + startup_info = &git_startup_info; + cmd = git_extract_argv0_path(argv[0]); if (!cmd) cmd = "git-help"; diff --git a/gitweb/README b/gitweb/README index 0e19be8d21..d481198796 100644 --- a/gitweb/README +++ b/gitweb/README @@ -95,7 +95,7 @@ You can specify the following configuration variables when building GIT: in the browser's URL bar and next to site name in bookmarks). Relative to base URI of gitweb. [Default: static/git-favicon.png] * GITWEB_JS - Points to the localtion where you put gitweb.js on your web server + Points to the location where you put gitweb.js on your web server (or to be more generic URI of JavaScript code used by gitweb). Relative to base URI of gitweb. [Default: static/gitweb.js (or static/gitweb.min.js if JSMIN build variable is defined / JavaScript @@ -233,7 +233,7 @@ not include variables usually directly set during build): is false. * $maxload Used to set the maximum load that we will still respond to gitweb queries. - If server load exceed this value then return "503 Service Unavaliable" error. + If server load exceed this value then return "503 Service Unavailable" error. Server load is taken to be 0 if gitweb cannot determine its value. Set it to undefined value to turn it off. The default is 300. diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index cedc357313..a85e2f6319 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -232,6 +232,29 @@ our %avatar_size = ( # Leave it undefined (or set to 'undef') to turn off load checking. our $maxload = 300; +# configuration for 'highlight' (http://www.andre-simon.de/) +# match by basename +our %highlight_basename = ( + #'Program' => 'py', + #'Library' => 'py', + 'SConstruct' => 'py', # SCons equivalent of Makefile + 'Makefile' => 'make', +); +# match by extension +our %highlight_ext = ( + # main extensions, defining name of syntax; + # see files in /usr/share/highlight/langDefs/ directory + map { $_ => $_ } + qw(py c cpp rb java css php sh pl js tex bib xml awk bat ini spec tcl), + # alternate extensions, see /etc/highlight/filetypes.conf + 'h' => 'c', + map { $_ => 'cpp' } qw(cxx c++ cc), + map { $_ => 'php' } qw(php3 php4), + map { $_ => 'pl' } qw(perl pm), # perhaps also 'cgi' + 'mak' => 'make', + map { $_ => 'xml' } qw(xhtml html htm), +); + # You define site-wide feature defaults here; override them with # $GITWEB_CONFIG as necessary. our %feature = ( @@ -245,7 +268,7 @@ our %feature = ( # return value of feature-sub indicates if to enable specified feature # # if there is no 'sub' key (no feature-sub), then feature cannot be - # overriden + # overridden # # use gitweb_get_feature(<feature>) to retrieve the <feature> value # (an array) or gitweb_check_feature(<feature>) to check if <feature> @@ -1037,8 +1060,12 @@ sub run_request { reset_timer(); evaluate_uri(); + evaluate_gitweb_config(); check_loadavg(); + # $projectroot and $projects_list might be set in gitweb config file + $projects_list ||= $projectroot; + evaluate_query_params(); evaluate_path_info(); evaluate_and_validate_params(); @@ -1086,12 +1113,8 @@ sub evaluate_argv { sub run { evaluate_argv(); - evaluate_gitweb_config(); evaluate_git_version(); - # $projectroot and $projects_list might be set in gitweb config file - $projects_list ||= $projectroot; - $pre_listen_hook->() if $pre_listen_hook; @@ -1102,7 +1125,7 @@ sub run { run_request(); - $pre_dispatch_hook->() + $post_dispatch_hook->() if $post_dispatch_hook; last REQUEST if ($is_last_request->()); @@ -1323,7 +1346,7 @@ sub esc_param { return $str; } -# quote unsafe chars in whole URL, so some charactrs cannot be quoted +# quote unsafe chars in whole URL, so some characters cannot be quoted sub esc_url { my $str = shift; return undef unless defined $str; @@ -3316,30 +3339,6 @@ sub blob_contenttype { sub guess_file_syntax { my ($highlight, $mimetype, $file_name) = @_; return undef unless ($highlight && defined $file_name); - - # configuration for 'highlight' (http://www.andre-simon.de/) - # match by basename - my %highlight_basename = ( - #'Program' => 'py', - #'Library' => 'py', - 'SConstruct' => 'py', # SCons equivalent of Makefile - 'Makefile' => 'make', - ); - # match by extension - my %highlight_ext = ( - # main extensions, defining name of syntax; - # see files in /usr/share/highlight/langDefs/ directory - map { $_ => $_ } - qw(py c cpp rb java css php sh pl js tex bib xml awk bat ini spec tcl), - # alternate extensions, see /etc/highlight/filetypes.conf - 'h' => 'c', - map { $_ => 'cpp' } qw(cxx c++ cc), - map { $_ => 'php' } qw(php3 php4), - map { $_ => 'pl' } qw(perl pm), # perhaps also 'cgi' - 'mak' => 'make', - map { $_ => 'xml' } qw(xhtml html htm), - ); - my $basename = basename($file_name, '.in'); return $highlight_basename{$basename} if exists $highlight_basename{$basename}; @@ -3782,9 +3781,9 @@ sub git_print_authorship { } # Outputs table rows containing the full author or committer information, -# in the format expected for 'commit' view (& similia). +# in the format expected for 'commit' view (& similar). # Parameters are a commit hash reference, followed by the list of people -# to output information for. If the list is empty it defalts to both +# to output information for. If the list is empty it defaults to both # author and committer. sub git_print_authorship_rows { my $co = shift; @@ -4513,8 +4512,8 @@ sub git_patchset_body { print "</div>\n"; # class="patch" } - # for compact combined (--cc) format, with chunk and patch simpliciaction - # patchset might be empty, but there might be unprocessed raw lines + # for compact combined (--cc) format, with chunk and patch simplification + # the patchset might be empty, but there might be unprocessed raw lines for (++$patch_idx if $patch_number > 0; $patch_idx < @$difftree; ++$patch_idx) { @@ -5192,15 +5191,15 @@ sub git_summary { } sub git_tag { - my $head = git_get_head_hash($project); - git_header_html(); - git_print_page_nav('','', $head,undef,$head); my %tag = parse_tag($hash); if (! %tag) { die_error(404, "Unknown tag object"); } + my $head = git_get_head_hash($project); + git_header_html(); + git_print_page_nav('','', $head,undef,$head); git_print_header_div('commit', esc_html($tag{'name'}), $hash); print "<div class=\"title_text\">\n" . "<table class=\"object_header\">\n" . @@ -6522,12 +6521,13 @@ sub git_search { $paging_nav .= " ⋅ next"; } - if ($#commitlist >= 100) { - } - git_print_page_nav('','', $hash,$co{'tree'},$hash, $paging_nav); git_print_header_div('commit', esc_html($co{'title'}), $hash); - git_search_grep_body(\@commitlist, 0, 99, $next_link); + if ($page == 0 && !@commitlist) { + print "<p>No match.</p>\n"; + } else { + git_search_grep_body(\@commitlist, 0, 99, $next_link); + } } if ($searchtype eq 'pickaxe') { @@ -8,17 +8,6 @@ /* Internal API */ /* - * Output the next line for a graph. - * This formats the next graph line into the specified strbuf. It is not - * terminated with a newline. - * - * Returns 1 if the line includes the current commit, and 0 otherwise. - * graph_next_line() will return 1 exactly once for each time - * graph_update() is called. - */ -static int graph_next_line(struct git_graph *graph, struct strbuf *sb); - -/* * Output a padding line in the graph. * This is similar to graph_next_line(). However, it is guaranteed to * never print the current commit line. Instead, if the commit line is @@ -73,7 +62,7 @@ enum graph_state { /* * The list of available column colors. */ -static char column_colors[][COLOR_MAXLEN] = { +static const char *column_colors_ansi[] = { GIT_COLOR_RED, GIT_COLOR_GREEN, GIT_COLOR_YELLOW, @@ -86,23 +75,33 @@ static char column_colors[][COLOR_MAXLEN] = { GIT_COLOR_BOLD_BLUE, GIT_COLOR_BOLD_MAGENTA, GIT_COLOR_BOLD_CYAN, + GIT_COLOR_RESET, }; -#define COLUMN_COLORS_MAX (ARRAY_SIZE(column_colors)) +#define COLUMN_COLORS_ANSI_MAX (ARRAY_SIZE(column_colors_ansi) - 1) + +static const char **column_colors; +static unsigned short column_colors_max; -static const char *column_get_color_code(const struct column *c) +void graph_set_column_colors(const char **colors, unsigned short colors_max) { - return column_colors[c->color]; + column_colors = colors; + column_colors_max = colors_max; +} + +static const char *column_get_color_code(unsigned short color) +{ + return column_colors[color]; } static void strbuf_write_column(struct strbuf *sb, const struct column *c, char col_char) { - if (c->color < COLUMN_COLORS_MAX) - strbuf_addstr(sb, column_get_color_code(c)); + if (c->color < column_colors_max) + strbuf_addstr(sb, column_get_color_code(c->color)); strbuf_addch(sb, col_char); - if (c->color < COLUMN_COLORS_MAX) - strbuf_addstr(sb, GIT_COLOR_RESET); + if (c->color < column_colors_max) + strbuf_addstr(sb, column_get_color_code(column_colors_max)); } struct git_graph { @@ -226,6 +225,11 @@ static struct strbuf *diff_output_prefix_callback(struct diff_options *opt, void struct git_graph *graph_init(struct rev_info *opt) { struct git_graph *graph = xmalloc(sizeof(struct git_graph)); + + if (!column_colors) + graph_set_column_colors(column_colors_ansi, + COLUMN_COLORS_ANSI_MAX); + graph->commit = NULL; graph->revs = opt; graph->num_parents = 0; @@ -242,7 +246,7 @@ struct git_graph *graph_init(struct rev_info *opt) * always increment it for the first commit we output. * This way we start at 0 for the first commit. */ - graph->default_column_color = COLUMN_COLORS_MAX - 1; + graph->default_column_color = column_colors_max - 1; /* * Allocate a reasonably large default number of columns @@ -365,7 +369,7 @@ static struct commit_list *first_interesting_parent(struct git_graph *graph) static unsigned short graph_get_current_column_color(const struct git_graph *graph) { if (!DIFF_OPT_TST(&graph->revs->diffopt, COLOR_DIFF)) - return COLUMN_COLORS_MAX; + return column_colors_max; return graph->default_column_color; } @@ -375,7 +379,7 @@ static unsigned short graph_get_current_column_color(const struct git_graph *gra static void graph_increment_column_color(struct git_graph *graph) { graph->default_column_color = (graph->default_column_color + 1) % - COLUMN_COLORS_MAX; + column_colors_max; } static unsigned short graph_find_commit_color(const struct git_graph *graph, @@ -439,7 +443,7 @@ static void graph_update_width(struct git_graph *graph, max_cols++; /* - * We added a column for the the current commit as part of + * We added a column for the current commit as part of * graph->num_parents. If the current commit was already in * graph->columns, then we have double counted it. */ @@ -1143,7 +1147,7 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct strbuf graph_update_state(graph, GRAPH_PADDING); } -static int graph_next_line(struct git_graph *graph, struct strbuf *sb) +int graph_next_line(struct git_graph *graph, struct strbuf *sb) { switch (graph->state) { case GRAPH_PADDING: @@ -5,6 +5,23 @@ struct git_graph; /* + * Set up a custom scheme for column colors. + * + * The default column color scheme inserts ANSI color escapes to colorize + * the graph. The various color escapes are stored in an array of strings + * where each entry corresponds to a color, except for the last entry, + * which denotes the escape for resetting the color back to the default. + * When generating the graph, strings from this array are inserted before + * and after the various column characters. + * + * This function allows you to enable a custom array of color escapes. + * The 'colors_max' argument is the index of the last "reset" entry. + * + * This functions must be called BEFORE graph_init() is called. + */ +void graph_set_column_colors(const char **colors, unsigned short colors_max); + +/* * Create a new struct git_graph. */ struct git_graph *graph_init(struct rev_info *opt); @@ -32,6 +49,17 @@ void graph_update(struct git_graph *graph, struct commit *commit); */ int graph_is_commit_finished(struct git_graph const *graph); +/* + * Output the next line for a graph. + * This formats the next graph line into the specified strbuf. It is not + * terminated with a newline. + * + * Returns 1 if the line includes the current commit, and 0 otherwise. + * graph_next_line() will return 1 exactly once for each time + * graph_update() is called. + */ +int graph_next_line(struct git_graph *graph, struct strbuf *sb); + /* * graph_show_*: helper functions for printing to stdout @@ -41,6 +41,7 @@ static long curl_low_speed_time = -1; static int curl_ftp_no_epsv; static const char *curl_http_proxy; static char *user_name, *user_pass; +static const char *user_agent; #if LIBCURL_VERSION_NUM >= 0x071700 /* Use CURLOPT_KEYPASSWD as is */ @@ -196,6 +197,9 @@ static int http_options(const char *var, const char *value, void *cb) return 0; } + if (!strcmp("http.useragent", var)) + return git_config_string(&user_agent, var, value); + /* Fall back on the default ones */ return git_default_config(var, value, cb); } @@ -279,7 +283,8 @@ static CURL *get_curl_handle(void) if (getenv("GIT_CURL_VERBOSE")) curl_easy_setopt(result, CURLOPT_VERBOSE, 1); - curl_easy_setopt(result, CURLOPT_USERAGENT, GIT_USER_AGENT); + curl_easy_setopt(result, CURLOPT_USERAGENT, + user_agent ? user_agent : GIT_HTTP_USER_AGENT); if (curl_ftp_no_epsv) curl_easy_setopt(result, CURLOPT_FTP_USE_EPSV, 0); @@ -380,6 +385,8 @@ void http_init(struct remote *remote) #endif set_from_env(&ssl_cainfo, "GIT_SSL_CAINFO"); + set_from_env(&user_agent, "GIT_HTTP_USER_AGENT"); + low_speed_limit = getenv("GIT_HTTP_LOW_SPEED_LIMIT"); if (low_speed_limit != NULL) curl_low_speed_limit = strtol(low_speed_limit, NULL, 10); @@ -23,10 +23,10 @@ #endif #if LIBCURL_VERSION_NUM < 0x070704 -#define curl_global_cleanup() do { /* nothing */ } while(0) +#define curl_global_cleanup() do { /* nothing */ } while (0) #endif #if LIBCURL_VERSION_NUM < 0x070800 -#define curl_global_init(a) do { /* nothing */ } while(0) +#define curl_global_init(a) do { /* nothing */ } while (0) #endif #if (LIBCURL_VERSION_NUM < 0x070c04) || (LIBCURL_VERSION_NUM == 0x071000) diff --git a/imap-send.c b/imap-send.c index 1a577a0a09..71506a8dd3 100644 --- a/imap-send.c +++ b/imap-send.c @@ -543,9 +543,13 @@ static struct imap_cmd *v_issue_imap_cmd(struct imap_store *ctx, while (imap->literal_pending) get_cmd_result(ctx, NULL); - bufl = nfsnprintf(buf, sizeof(buf), cmd->cb.data ? CAP(LITERALPLUS) ? - "%d %s{%d+}\r\n" : "%d %s{%d}\r\n" : "%d %s\r\n", - cmd->tag, cmd->cmd, cmd->cb.dlen); + if (!cmd->cb.data) + bufl = nfsnprintf(buf, sizeof(buf), "%d %s\r\n", cmd->tag, cmd->cmd); + else + bufl = nfsnprintf(buf, sizeof(buf), "%d %s{%d%s}\r\n", + cmd->tag, cmd->cmd, cmd->cb.dlen, + CAP(LITERALPLUS) ? "+" : ""); + if (Verbose) { if (imap->num_in_progress) printf("(%d in progress) ", imap->num_in_progress); @@ -1086,7 +1090,7 @@ static struct store *imap_open_store(struct imap_server_conf *srvc) int gai; char portstr[6]; - snprintf(portstr, sizeof(portstr), "%hu", srvc->port); + snprintf(portstr, sizeof(portstr), "%d", srvc->port); memset(&hints, 0, sizeof(hints)); hints.ai_socktype = SOCK_STREAM; diff --git a/ll-merge.c b/ll-merge.c index 3764a1ab72..6bb3095c3a 100644 --- a/ll-merge.c +++ b/ll-merge.c @@ -46,7 +46,7 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused, * or common ancestor for an internal merge. Still return * "conflicted merge" status. */ - mmfile_t *stolen = (flag & 01) ? orig : src1; + mmfile_t *stolen = (flag & LL_OPT_VIRTUAL_ANCESTOR) ? orig : src1; result->ptr = stolen->ptr; result->size = stolen->size; @@ -79,7 +79,7 @@ static int ll_xdl_merge(const struct ll_merge_driver *drv_unused, memset(&xmp, 0, sizeof(xmp)); xmp.level = XDL_MERGE_ZEALOUS; - xmp.favor= (flag >> 1) & 03; + xmp.favor = ll_opt_favor(flag); if (git_xmerge_style >= 0) xmp.style = git_xmerge_style; if (marker_size > 0) @@ -99,7 +99,8 @@ static int ll_union_merge(const struct ll_merge_driver *drv_unused, int flag, int marker_size) { /* Use union favor */ - flag = (flag & 1) | (XDL_MERGE_FAVOR_UNION << 1); + flag &= ~LL_OPT_FAVOR_MASK; + flag |= create_ll_flag(XDL_MERGE_FAVOR_UNION); return ll_xdl_merge(drv_unused, result, path_unused, orig, NULL, src1, NULL, src2, NULL, flag, marker_size); @@ -321,6 +322,16 @@ static int git_path_check_merge(const char *path, struct git_attr_check check[2] return git_checkattr(path, 2, check); } +static void normalize_file(mmfile_t *mm, const char *path) +{ + struct strbuf strbuf = STRBUF_INIT; + if (renormalize_buffer(path, mm->ptr, mm->size, &strbuf)) { + free(mm->ptr); + mm->size = strbuf.len; + mm->ptr = strbuf_detach(&strbuf, NULL); + } +} + int ll_merge(mmbuffer_t *result_buf, const char *path, mmfile_t *ancestor, const char *ancestor_label, @@ -332,8 +343,13 @@ int ll_merge(mmbuffer_t *result_buf, const char *ll_driver_name = NULL; int marker_size = DEFAULT_CONFLICT_MARKER_SIZE; const struct ll_merge_driver *driver; - int virtual_ancestor = flag & 01; + int virtual_ancestor = flag & LL_OPT_VIRTUAL_ANCESTOR; + if (flag & LL_OPT_RENORMALIZE) { + normalize_file(ancestor, path); + normalize_file(ours, path); + normalize_file(theirs, path); + } if (!git_path_check_merge(path, check)) { ll_driver_name = check[0].value; if (check[1].value) { diff --git a/ll-merge.h b/ll-merge.h index 57754cc8ca..ff7ca87bfa 100644 --- a/ll-merge.h +++ b/ll-merge.h @@ -5,6 +5,21 @@ #ifndef LL_MERGE_H #define LL_MERGE_H +#define LL_OPT_VIRTUAL_ANCESTOR (1 << 0) +#define LL_OPT_FAVOR_MASK ((1 << 1) | (1 << 2)) +#define LL_OPT_FAVOR_SHIFT 1 +#define LL_OPT_RENORMALIZE (1 << 3) + +static inline int ll_opt_favor(int flag) +{ + return (flag & LL_OPT_FAVOR_MASK) >> LL_OPT_FAVOR_SHIFT; +} + +static inline int create_ll_flag(int favor) +{ + return ((favor << LL_OPT_FAVOR_SHIFT) & LL_OPT_FAVOR_MASK); +} + int ll_merge(mmbuffer_t *result_buf, const char *path, mmfile_t *ancestor, const char *ancestor_label, diff --git a/merge-recursive.c b/merge-recursive.c index fb6aa4a551..20e1779428 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -20,6 +20,7 @@ #include "attr.h" #include "merge-recursive.h" #include "dir.h" +#include "submodule.h" static struct tree *shift_tree_object(struct tree *one, struct tree *two, const char *subtree_shift) @@ -136,16 +137,10 @@ static void output_commit_title(struct merge_options *o, struct commit *commit) if (parse_commit(commit) != 0) printf("(bad commit)\n"); else { - const char *s; - int len; - for (s = commit->buffer; *s; s++) - if (*s == '\n' && s[1] == '\n') { - s += 2; - break; - } - for (len = 0; s[len] && '\n' != s[len]; len++) - ; /* do nothing */ - printf("%.*s\n", len, s); + const char *title; + int len = find_commit_subject(commit->buffer, &title); + if (len) + printf("%.*s\n", len, title); } } } @@ -185,7 +180,7 @@ static int git_merge_trees(int index_only, opts.fn = threeway_merge; opts.src_index = &the_index; opts.dst_index = &the_index; - opts.msgs = get_porcelain_error_msgs(); + setup_unpack_trees_porcelain(&opts, "merge"); init_tree_desc_from_tree(t+0, common); init_tree_desc_from_tree(t+1, head); @@ -525,13 +520,15 @@ static void update_file_flags(struct merge_options *o, void *buf; unsigned long size; - if (S_ISGITLINK(mode)) + if (S_ISGITLINK(mode)) { /* * We may later decide to recursively descend into * the submodule directory and update its index * and/or work tree, but we do not do that now. */ + update_wd = 0; goto update_index; + } buf = read_sha1_file(sha, &type, &size); if (!buf) @@ -647,7 +644,9 @@ static int merge_3way(struct merge_options *o, merge_status = ll_merge(result_buf, a->path, &orig, base_name, &src1, name1, &src2, name2, - (!!o->call_depth) | (favor << 1)); + ((o->call_depth ? LL_OPT_VIRTUAL_ANCESTOR : 0) | + (o->renormalize ? LL_OPT_RENORMALIZE : 0) | + create_ll_flag(favor))); free(name1); free(name2); @@ -716,8 +715,8 @@ static struct merge_file_info merge_file(struct merge_options *o, free(result_buf.ptr); result.clean = (merge_status == 0); } else if (S_ISGITLINK(a->mode)) { - result.clean = 0; - hashcpy(result.sha, a->sha1); + result.clean = merge_submodule(result.sha, one->path, one->sha1, + a->sha1, b->sha1); } else if (S_ISLNK(a->mode)) { hashcpy(result.sha, a->sha1); @@ -806,7 +805,8 @@ static int process_renames(struct merge_options *o, struct string_list *b_renames) { int clean_merge = 1, i, j; - struct string_list a_by_dst = {NULL, 0, 0, 0}, b_by_dst = {NULL, 0, 0, 0}; + struct string_list a_by_dst = STRING_LIST_INIT_NODUP; + struct string_list b_by_dst = STRING_LIST_INIT_NODUP; const struct rename *sre; for (i = 0; i < a_renames->nr; i++) { @@ -1019,14 +1019,22 @@ static int process_renames(struct merge_options *o, if (mfi.clean && sha_eq(mfi.sha, ren1->pair->two->sha1) && - mfi.mode == ren1->pair->two->mode) + mfi.mode == ren1->pair->two->mode) { /* - * This messaged is part of + * This message is part of * t6022 test. If you change * it update the test too. */ output(o, 3, "Skipped %s (merged same as existing)", ren1_dst); - else { + + /* There may be higher stage entries left + * in the index (e.g. due to a D/F + * conflict) that need to be resolved. + */ + if (!ren1->dst_entry->stages[2].mode != + !ren1->dst_entry->stages[3].mode) + ren1->dst_entry->processed = 0; + } else { if (mfi.merge || !mfi.clean) output(o, 1, "Renaming %s => %s", ren1_src, ren1_dst); if (mfi.merge) @@ -1056,6 +1064,53 @@ static unsigned char *stage_sha(const unsigned char *sha, unsigned mode) return (is_null_sha1(sha) || mode == 0) ? NULL: (unsigned char *)sha; } +static int read_sha1_strbuf(const unsigned char *sha1, struct strbuf *dst) +{ + void *buf; + enum object_type type; + unsigned long size; + buf = read_sha1_file(sha1, &type, &size); + if (!buf) + return error("cannot read object %s", sha1_to_hex(sha1)); + if (type != OBJ_BLOB) { + free(buf); + return error("object %s is not a blob", sha1_to_hex(sha1)); + } + strbuf_attach(dst, buf, size, size + 1); + return 0; +} + +static int blob_unchanged(const unsigned char *o_sha, + const unsigned char *a_sha, + int renormalize, const char *path) +{ + struct strbuf o = STRBUF_INIT; + struct strbuf a = STRBUF_INIT; + int ret = 0; /* assume changed for safety */ + + if (sha_eq(o_sha, a_sha)) + return 1; + if (!renormalize) + return 0; + + assert(o_sha && a_sha); + if (read_sha1_strbuf(o_sha, &o) || read_sha1_strbuf(a_sha, &a)) + goto error_return; + /* + * Note: binary | is used so that both renormalizations are + * performed. Comparison can be skipped if both files are + * unchanged since their sha1s have already been compared. + */ + if (renormalize_buffer(path, o.buf, o.len, &o) | + renormalize_buffer(path, a.buf, o.len, &a)) + ret = (o.len == a.len && !memcmp(o.buf, a.buf, o.len)); + +error_return: + strbuf_release(&o); + strbuf_release(&a); + return ret; +} + /* Per entry merge function */ static int process_entry(struct merge_options *o, const char *path, struct stage_data *entry) @@ -1065,6 +1120,7 @@ static int process_entry(struct merge_options *o, print_index_entry("\tpath: ", entry); */ int clean_merge = 1; + int normalize = o->renormalize; unsigned o_mode = entry->stages[1].mode; unsigned a_mode = entry->stages[2].mode; unsigned b_mode = entry->stages[3].mode; @@ -1072,11 +1128,12 @@ static int process_entry(struct merge_options *o, unsigned char *a_sha = stage_sha(entry->stages[2].sha, a_mode); unsigned char *b_sha = stage_sha(entry->stages[3].sha, b_mode); + entry->processed = 1; if (o_sha && (!a_sha || !b_sha)) { /* Case A: Deleted in one */ if ((!a_sha && !b_sha) || - (sha_eq(a_sha, o_sha) && !b_sha) || - (!a_sha && sha_eq(b_sha, o_sha))) { + (!b_sha && blob_unchanged(o_sha, a_sha, normalize, path)) || + (!a_sha && blob_unchanged(o_sha, b_sha, normalize, path))) { /* Deleted in both or deleted in one and * unchanged in the other */ if (a_sha) @@ -1104,33 +1161,28 @@ static int process_entry(struct merge_options *o, } else if ((!o_sha && a_sha && !b_sha) || (!o_sha && !a_sha && b_sha)) { /* Case B: Added in one. */ - const char *add_branch; - const char *other_branch; unsigned mode; const unsigned char *sha; - const char *conf; if (a_sha) { - add_branch = o->branch1; - other_branch = o->branch2; mode = a_mode; sha = a_sha; - conf = "file/directory"; } else { - add_branch = o->branch2; - other_branch = o->branch1; mode = b_mode; sha = b_sha; - conf = "directory/file"; } if (string_list_has_string(&o->current_directory_set, path)) { - const char *new_path = unique_path(o, path, add_branch); - clean_merge = 0; - output(o, 1, "CONFLICT (%s): There is a directory with name %s in %s. " - "Adding %s as %s", - conf, path, other_branch, path, new_path); - remove_file(o, 0, path, 0); - update_file(o, 0, sha, mode, new_path); + /* Handle D->F conflicts after all subfiles */ + entry->processed = 0; + /* But get any file out of the way now, so conflicted + * entries below the directory of the same name can + * be put in the working directory. + */ + if (a_sha) + output(o, 2, "Removing %s", path); + /* do not touch working file if it did not exist */ + remove_file(o, 0, path, !a_sha); + return 1; /* Assume clean till processed */ } else { output(o, 2, "Adding %s", path); update_file(o, 1, sha, mode, path); @@ -1178,26 +1230,62 @@ static int process_entry(struct merge_options *o, return clean_merge; } -struct unpack_trees_error_msgs get_porcelain_error_msgs(void) +/* + * Per entry merge function for D/F conflicts, to be called only after + * all files below dir have been processed. We do this because in the + * cases we can cleanly resolve D/F conflicts, process_entry() can clean + * out all the files below the directory for us. + */ +static int process_df_entry(struct merge_options *o, + const char *path, struct stage_data *entry) { - struct unpack_trees_error_msgs msgs = { - /* would_overwrite */ - "Your local changes to '%s' would be overwritten by merge. Aborting.", - /* not_uptodate_file */ - "Your local changes to '%s' would be overwritten by merge. Aborting.", - /* not_uptodate_dir */ - "Updating '%s' would lose untracked files in it. Aborting.", - /* would_lose_untracked */ - "Untracked working tree file '%s' would be %s by merge. Aborting", - /* bind_overlap -- will not happen here */ - NULL, - }; - if (advice_commit_before_merge) { - msgs.would_overwrite = msgs.not_uptodate_file = - "Your local changes to '%s' would be overwritten by merge. Aborting.\n" - "Please, commit your changes or stash them before you can merge."; + int clean_merge = 1; + unsigned o_mode = entry->stages[1].mode; + unsigned a_mode = entry->stages[2].mode; + unsigned b_mode = entry->stages[3].mode; + unsigned char *o_sha = stage_sha(entry->stages[1].sha, o_mode); + unsigned char *a_sha = stage_sha(entry->stages[2].sha, a_mode); + unsigned char *b_sha = stage_sha(entry->stages[3].sha, b_mode); + const char *add_branch; + const char *other_branch; + unsigned mode; + const unsigned char *sha; + const char *conf; + struct stat st; + + /* We currently only handle D->F cases */ + assert((!o_sha && a_sha && !b_sha) || + (!o_sha && !a_sha && b_sha)); + + entry->processed = 1; + + if (a_sha) { + add_branch = o->branch1; + other_branch = o->branch2; + mode = a_mode; + sha = a_sha; + conf = "file/directory"; + } else { + add_branch = o->branch2; + other_branch = o->branch1; + mode = b_mode; + sha = b_sha; + conf = "directory/file"; } - return msgs; + if (lstat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + const char *new_path = unique_path(o, path, add_branch); + clean_merge = 0; + output(o, 1, "CONFLICT (%s): There is a directory with name %s in %s. " + "Adding %s as %s", + conf, path, other_branch, path, new_path); + remove_file(o, 0, path, 0); + update_file(o, 0, sha, mode, new_path); + } else { + output(o, 2, "Adding %s", path); + update_file(o, 1, sha, mode, path); + } + + return clean_merge; } int merge_trees(struct merge_options *o, @@ -1249,6 +1337,13 @@ int merge_trees(struct merge_options *o, && !process_entry(o, path, e)) clean = 0; } + for (i = 0; i < entries->nr; i++) { + const char *path = entries->items[i].string; + struct stage_data *e = entries->items[i].util; + if (!e->processed + && !process_df_entry(o, path, e)) + clean = 0; + } string_list_clear(re_merge, 0); string_list_clear(re_head, 0); @@ -1436,6 +1531,7 @@ void init_merge_options(struct merge_options *o) o->buffer_output = 1; o->diff_rename_limit = -1; o->merge_rename_limit = -1; + o->renormalize = 0; git_config(merge_recursive_config, o); if (getenv("GIT_MERGE_VERBOSITY")) o->verbosity = diff --git a/merge-recursive.h b/merge-recursive.h index b831293b38..34492dbd6e 100644 --- a/merge-recursive.h +++ b/merge-recursive.h @@ -14,6 +14,7 @@ struct merge_options { } recursive_variant; const char *subtree_shift; unsigned buffer_output : 1; + unsigned renormalize : 1; int verbosity; int diff_rename_limit; int merge_rename_limit; @@ -23,9 +24,6 @@ struct merge_options { struct string_list current_directory_set; }; -/* Return a list of user-friendly error messages to be used by merge */ -struct unpack_trees_error_msgs get_porcelain_error_msgs(void); - /* merge_trees() but with recursive ancestor consolidation */ int merge_recursive(struct merge_options *o, struct commit *h1, @@ -877,14 +877,6 @@ void string_list_add_refs_from_colon_sep(struct string_list *list, strbuf_release(&globbuf); } -static int string_list_add_refs_from_list(struct string_list_item *item, - void *cb) -{ - struct string_list *list = cb; - string_list_add_refs_by_glob(list, item->string); - return 0; -} - static int notes_display_config(const char *k, const char *v, void *cb) { int *load_refs = cb; @@ -947,30 +939,18 @@ void init_notes(struct notes_tree *t, const char *notes_ref, load_subtree(t, &root_tree, t->root, 0); } -struct load_notes_cb_data { - int counter; - struct notes_tree **trees; -}; - -static int load_one_display_note_ref(struct string_list_item *item, - void *cb_data) -{ - struct load_notes_cb_data *c = cb_data; - struct notes_tree *t = xcalloc(1, sizeof(struct notes_tree)); - init_notes(t, item->string, combine_notes_ignore, 0); - c->trees[c->counter++] = t; - return 0; -} - struct notes_tree **load_notes_trees(struct string_list *refs) { + struct string_list_item *item; + int counter = 0; struct notes_tree **trees; - struct load_notes_cb_data cb_data; trees = xmalloc((refs->nr+1) * sizeof(struct notes_tree *)); - cb_data.counter = 0; - cb_data.trees = trees; - for_each_string_list(refs, load_one_display_note_ref, &cb_data); - trees[cb_data.counter] = NULL; + for_each_string_list_item(item, refs) { + struct notes_tree *t = xcalloc(1, sizeof(struct notes_tree)); + init_notes(t, item->string, combine_notes_ignore, 0); + trees[counter++] = t; + } + trees[counter] = NULL; return trees; } @@ -995,10 +975,12 @@ void init_display_notes(struct display_notes_opt *opt) git_config(notes_display_config, &load_config_refs); - if (opt && opt->extra_notes_refs) - for_each_string_list(opt->extra_notes_refs, - string_list_add_refs_from_list, - &display_notes_refs); + if (opt && opt->extra_notes_refs) { + struct string_list_item *item; + for_each_string_list_item(item, opt->extra_notes_refs) + string_list_add_refs_by_glob(&display_notes_refs, + item->string); + } display_notes_trees = load_notes_trees(&display_notes_refs); string_list_clear(&display_notes_refs, 0); @@ -199,7 +199,7 @@ struct object *parse_object(const unsigned char *sha1) return NULL; } - obj = parse_object_buffer(repl, type, size, buffer, &eaten); + obj = parse_object_buffer(sha1, type, size, buffer, &eaten); if (!eaten) free(buffer); return obj; @@ -21,6 +21,8 @@ struct object_array { } *objects; }; +#define OBJECT_ARRAY_INIT { 0, 0, NULL } + #define TYPE_BITS 3 #define FLAG_BITS 27 diff --git a/pack-check.c b/pack-check.c index 395fb9527a..9d0cb9a114 100644 --- a/pack-check.c +++ b/pack-check.c @@ -77,7 +77,7 @@ static int verify_packfile(struct packed_git *p, err = error("%s SHA1 checksum mismatch", p->pack_name); if (hashcmp(index_base + index_size - 40, pack_sig)) - err = error("%s SHA1 does not match its inddex", + err = error("%s SHA1 does not match its index", p->pack_name); unuse_pack(w_curs); diff --git a/pack-refs.c b/pack-refs.c index 7f43f8ac33..1290570260 100644 --- a/pack-refs.c +++ b/pack-refs.c @@ -60,6 +60,37 @@ static int handle_one_ref(const char *path, const unsigned char *sha1, return 0; } +/* + * Remove empty parents, but spare refs/ and immediate subdirs. + * Note: munges *name. + */ +static void try_remove_empty_parents(char *name) +{ + char *p, *q; + int i; + p = name; + for (i = 0; i < 2; i++) { /* refs/{heads,tags,...}/ */ + while (*p && *p != '/') + p++; + /* tolerate duplicate slashes; see check_ref_format() */ + while (*p == '/') + p++; + } + for (q = p; *q; q++) + ; + while (1) { + while (q > p && *q != '/') + q--; + while (q > p && *(q-1) == '/') + q--; + if (q == p) + break; + *q = '\0'; + if (rmdir(git_path("%s", name))) + break; + } +} + /* make sure nobody touched the ref, and unlink */ static void prune_ref(struct ref_to_prune *r) { @@ -68,6 +99,7 @@ static void prune_ref(struct ref_to_prune *r) if (lock) { unlink_or_warn(git_path("%s", r->name)); unlock_ref(lock); + try_remove_empty_parents(r->name); } } diff --git a/parse-options.h b/parse-options.h index 7435cdbf1d..d982f0f1bf 100644 --- a/parse-options.h +++ b/parse-options.h @@ -69,7 +69,7 @@ typedef int parse_opt_cb(const struct option *, const char *arg, int unset); * `flags`:: * mask of parse_opt_option_flags. * PARSE_OPT_OPTARG: says that the argument is optional (not for BOOLEANs) - * PARSE_OPT_NOARG: says that this option takes no argument + * PARSE_OPT_NOARG: says that this option does not take an argument * PARSE_OPT_NONEG: says that this option cannot be negated * PARSE_OPT_HIDDEN: this option is skipped in the default usage, and * shown only in the full usage. @@ -122,6 +122,44 @@ char *git_path(const char *fmt, ...) return cleanup_path(pathname); } +char *git_path_submodule(const char *path, const char *fmt, ...) +{ + char *pathname = get_pathname(); + struct strbuf buf = STRBUF_INIT; + const char *git_dir; + va_list args; + unsigned len; + + len = strlen(path); + if (len > PATH_MAX-100) + return bad_path; + + strbuf_addstr(&buf, path); + if (len && path[len-1] != '/') + strbuf_addch(&buf, '/'); + strbuf_addstr(&buf, ".git"); + + git_dir = read_gitfile_gently(buf.buf); + if (git_dir) { + strbuf_reset(&buf); + strbuf_addstr(&buf, git_dir); + } + strbuf_addch(&buf, '/'); + + if (buf.len >= PATH_MAX) + return bad_path; + memcpy(pathname, buf.buf, buf.len + 1); + + strbuf_release(&buf); + len = strlen(pathname); + + va_start(args, fmt); + len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); + va_end(args); + if (len >= PATH_MAX) + return bad_path; + return cleanup_path(pathname); +} /* git_mkstemp() - create tmp file honoring TMPDIR variable */ int git_mkstemp(char *path, size_t len, const char *template) @@ -316,6 +354,8 @@ char *expand_user_path(const char *path) size_t username_len = first_slash - username; if (username_len == 0) { const char *home = getenv("HOME"); + if (!home) + goto return_null; strbuf_add(&user_path, home, strlen(home)); } else { struct passwd *pw = getpw_str(username, username_len); diff --git a/perl/Makefile b/perl/Makefile index 4ab21d61b8..a2ffb6402d 100644 --- a/perl/Makefile +++ b/perl/Makefile @@ -38,7 +38,7 @@ $(makfile): ../GIT-CFLAGS Makefile echo ' echo $(instdir_SQ)' >> $@ else $(makfile): Makefile.PL ../GIT-CFLAGS - $(PERL_PATH) $< PREFIX='$(prefix_SQ)' + $(PERL_PATH) $< PREFIX='$(prefix_SQ)' INSTALL_BASE='' endif # this is just added comfort for calling make directly in perl dir diff --git a/reachable.c b/reachable.c index b515fa2de3..a03fabf060 100644 --- a/reachable.c +++ b/reachable.c @@ -90,7 +90,7 @@ static void walk_commit_list(struct rev_info *revs) { int i; struct commit *commit; - struct object_array objects = { 0, 0, NULL }; + struct object_array objects = OBJECT_ARRAY_INIT; /* Walk all commits, process their trees */ while ((commit = get_revision(revs)) != NULL) diff --git a/read-cache.c b/read-cache.c index f1f789b7b8..1f42473e80 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1516,6 +1516,7 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce) int size = ondisk_ce_size(ce); struct ondisk_cache_entry *ondisk = xcalloc(1, size); char *name; + int result; ondisk->ctime.sec = htonl(ce->ce_ctime.sec); ondisk->mtime.sec = htonl(ce->ce_mtime.sec); @@ -1539,7 +1540,9 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce) name = ondisk->name; memcpy(name, ce->name, ce_namelen(ce)); - return ce_write(c, fd, ondisk, size); + result = ce_write(c, fd, ondisk, size); + free(ondisk); + return result; } int write_index(struct index_state *istate, int newfd) @@ -157,7 +157,7 @@ static struct cached_refs { char did_packed; struct ref_list *loose; struct ref_list *packed; -} cached_refs; +} cached_refs, submodule_refs; static struct ref_list *current_ref; static struct ref_list *extra_refs; @@ -229,23 +229,45 @@ void clear_extra_refs(void) extra_refs = NULL; } -static struct ref_list *get_packed_refs(void) +static struct ref_list *get_packed_refs(const char *submodule) { - if (!cached_refs.did_packed) { - FILE *f = fopen(git_path("packed-refs"), "r"); - cached_refs.packed = NULL; + const char *packed_refs_file; + struct cached_refs *refs; + + if (submodule) { + packed_refs_file = git_path_submodule(submodule, "packed-refs"); + refs = &submodule_refs; + free_ref_list(refs->packed); + } else { + packed_refs_file = git_path("packed-refs"); + refs = &cached_refs; + } + + if (!refs->did_packed || submodule) { + FILE *f = fopen(packed_refs_file, "r"); + refs->packed = NULL; if (f) { - read_packed_refs(f, &cached_refs); + read_packed_refs(f, refs); fclose(f); } - cached_refs.did_packed = 1; + refs->did_packed = 1; } - return cached_refs.packed; + return refs->packed; } -static struct ref_list *get_ref_dir(const char *base, struct ref_list *list) +static struct ref_list *get_ref_dir(const char *submodule, const char *base, + struct ref_list *list) { - DIR *dir = opendir(git_path("%s", base)); + DIR *dir; + const char *path; + + if (submodule) + path = git_path_submodule(submodule, "%s", base); + else + path = git_path("%s", base); + + + dir = opendir(path); if (dir) { struct dirent *de; @@ -261,6 +283,7 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list) struct stat st; int flag; int namelen; + const char *refdir; if (de->d_name[0] == '.') continue; @@ -270,16 +293,27 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list) if (has_extension(de->d_name, ".lock")) continue; memcpy(ref + baselen, de->d_name, namelen+1); - if (stat(git_path("%s", ref), &st) < 0) + refdir = submodule + ? git_path_submodule(submodule, "%s", ref) + : git_path("%s", ref); + if (stat(refdir, &st) < 0) continue; if (S_ISDIR(st.st_mode)) { - list = get_ref_dir(ref, list); + list = get_ref_dir(submodule, ref, list); continue; } - if (!resolve_ref(ref, sha1, 1, &flag)) { + if (submodule) { hashclr(sha1); - flag |= REF_BROKEN; - } + flag = 0; + if (resolve_gitlink_ref(submodule, ref, sha1) < 0) { + hashclr(sha1); + flag |= REF_BROKEN; + } + } else + if (!resolve_ref(ref, sha1, 1, &flag)) { + hashclr(sha1); + flag |= REF_BROKEN; + } list = add_ref(ref, sha1, flag, list, NULL); } free(ref); @@ -322,10 +356,16 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname) for_each_rawref(warn_if_dangling_symref, &data); } -static struct ref_list *get_loose_refs(void) +static struct ref_list *get_loose_refs(const char *submodule) { + if (submodule) { + free_ref_list(submodule_refs.loose); + submodule_refs.loose = get_ref_dir(submodule, "refs", NULL); + return submodule_refs.loose; + } + if (!cached_refs.did_loose) { - cached_refs.loose = get_ref_dir("refs", NULL); + cached_refs.loose = get_ref_dir(NULL, "refs", NULL); cached_refs.did_loose = 1; } return cached_refs.loose; @@ -459,7 +499,7 @@ const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int * git_snpath(path, sizeof(path), "%s", ref); /* Special case: non-existing file. */ if (lstat(path, &st) < 0) { - struct ref_list *list = get_packed_refs(); + struct ref_list *list = get_packed_refs(NULL); while (list) { if (!strcmp(ref, list->name)) { hashcpy(sha1, list->sha1); @@ -588,7 +628,7 @@ int peel_ref(const char *ref, unsigned char *sha1) return -1; if ((flag & REF_ISPACKED)) { - struct ref_list *list = get_packed_refs(); + struct ref_list *list = get_packed_refs(NULL); while (list) { if (!strcmp(list->name, ref)) { @@ -615,12 +655,12 @@ fallback: return -1; } -static int do_for_each_ref(const char *base, each_ref_fn fn, int trim, - int flags, void *cb_data) +static int do_for_each_ref(const char *submodule, const char *base, each_ref_fn fn, + int trim, int flags, void *cb_data) { int retval = 0; - struct ref_list *packed = get_packed_refs(); - struct ref_list *loose = get_loose_refs(); + struct ref_list *packed = get_packed_refs(submodule); + struct ref_list *loose = get_loose_refs(submodule); struct ref_list *extra; @@ -657,24 +697,54 @@ end_each: return retval; } -int head_ref(each_ref_fn fn, void *cb_data) + +static int do_head_ref(const char *submodule, each_ref_fn fn, void *cb_data) { unsigned char sha1[20]; int flag; + if (submodule) { + if (resolve_gitlink_ref(submodule, "HEAD", sha1) == 0) + return fn("HEAD", sha1, 0, cb_data); + + return 0; + } + if (resolve_ref("HEAD", sha1, 1, &flag)) return fn("HEAD", sha1, flag, cb_data); + return 0; } +int head_ref(each_ref_fn fn, void *cb_data) +{ + return do_head_ref(NULL, fn, cb_data); +} + +int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return do_head_ref(submodule, fn, cb_data); +} + int for_each_ref(each_ref_fn fn, void *cb_data) { - return do_for_each_ref("refs/", fn, 0, 0, cb_data); + return do_for_each_ref(NULL, "refs/", fn, 0, 0, cb_data); +} + +int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(submodule, "refs/", fn, 0, 0, cb_data); } int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data) { - return do_for_each_ref(prefix, fn, strlen(prefix), 0, cb_data); + return do_for_each_ref(NULL, prefix, fn, strlen(prefix), 0, cb_data); +} + +int for_each_ref_in_submodule(const char *submodule, const char *prefix, + each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(submodule, prefix, fn, strlen(prefix), 0, cb_data); } int for_each_tag_ref(each_ref_fn fn, void *cb_data) @@ -682,19 +752,34 @@ int for_each_tag_ref(each_ref_fn fn, void *cb_data) return for_each_ref_in("refs/tags/", fn, cb_data); } +int for_each_tag_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return for_each_ref_in_submodule(submodule, "refs/tags/", fn, cb_data); +} + int for_each_branch_ref(each_ref_fn fn, void *cb_data) { return for_each_ref_in("refs/heads/", fn, cb_data); } +int for_each_branch_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return for_each_ref_in_submodule(submodule, "refs/heads/", fn, cb_data); +} + int for_each_remote_ref(each_ref_fn fn, void *cb_data) { return for_each_ref_in("refs/remotes/", fn, cb_data); } +int for_each_remote_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return for_each_ref_in_submodule(submodule, "refs/remotes/", fn, cb_data); +} + int for_each_replace_ref(each_ref_fn fn, void *cb_data) { - return do_for_each_ref("refs/replace/", fn, 13, 0, cb_data); + return do_for_each_ref(NULL, "refs/replace/", fn, 13, 0, cb_data); } int for_each_glob_ref_in(each_ref_fn fn, const char *pattern, @@ -734,7 +819,7 @@ int for_each_glob_ref(each_ref_fn fn, const char *pattern, void *cb_data) int for_each_rawref(each_ref_fn fn, void *cb_data) { - return do_for_each_ref("refs/", fn, 0, + return do_for_each_ref(NULL, "refs/", fn, 0, DO_FOR_EACH_INCLUDE_BROKEN, cb_data); } @@ -958,7 +1043,7 @@ static struct ref_lock *lock_ref_sha1_basic(const char *ref, const unsigned char * name is a proper prefix of our refname. */ if (missing && - !is_refname_available(ref, NULL, get_packed_refs(), 0)) { + !is_refname_available(ref, NULL, get_packed_refs(NULL), 0)) { last_errno = ENOTDIR; goto error_return; } @@ -1021,7 +1106,7 @@ static int repack_without_ref(const char *refname) int fd; int found = 0; - packed_ref_list = get_packed_refs(); + packed_ref_list = get_packed_refs(NULL); for (list = packed_ref_list; list; list = list->next) { if (!strcmp(refname, list->name)) { found = 1; @@ -1119,10 +1204,10 @@ int rename_ref(const char *oldref, const char *newref, const char *logmsg) if (!symref) return error("refname %s not found", oldref); - if (!is_refname_available(newref, oldref, get_packed_refs(), 0)) + if (!is_refname_available(newref, oldref, get_packed_refs(NULL), 0)) return 1; - if (!is_refname_available(newref, oldref, get_loose_refs(), 0)) + if (!is_refname_available(newref, oldref, get_loose_refs(NULL), 0)) return 1; lock = lock_ref_sha1_basic(renamed_ref, NULL, 0, NULL); @@ -28,6 +28,14 @@ extern int for_each_replace_ref(each_ref_fn, void *); extern int for_each_glob_ref(each_ref_fn, const char *pattern, void *); extern int for_each_glob_ref_in(each_ref_fn, const char *pattern, const char* prefix, void *); +extern int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_ref_in_submodule(const char *submodule, const char *prefix, + each_ref_fn fn, void *cb_data); +extern int for_each_tag_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_branch_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_remote_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); + static inline const char *has_glob_specials(const char *pattern) { return strpbrk(pattern, "?*["); diff --git a/remote-curl.c b/remote-curl.c index 24fbb9a9b9..04d4813e41 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -528,11 +528,12 @@ static int rpc_service(struct rpc_state *rpc, struct discovery *heads) rpc->len = n; err |= post_rpc(rpc); } - strbuf_read(&rpc->result, client.out, 0); close(client.in); - close(client.out); client.in = -1; + strbuf_read(&rpc->result, client.out, 0); + + close(client.out); client.out = -1; err |= finish_command(&client); @@ -754,7 +754,7 @@ int for_each_remote(each_remote_fn fn, void *priv) void ref_remove_duplicates(struct ref *ref_map) { - struct string_list refs = { NULL, 0, 0, 0 }; + struct string_list refs = STRING_LIST_INIT_NODUP; struct string_list_item *item = NULL; struct ref *prev = NULL, *next = NULL; for (; ref_map; prev = ref_map, ref_map = next) { @@ -1704,7 +1704,7 @@ static int get_stale_heads_cb(const char *refname, struct ref *get_stale_heads(struct remote *remote, struct ref *fetch_map) { struct ref *ref, *stale_refs = NULL; - struct string_list ref_names = { NULL, 0, 0, 0 }; + struct string_list ref_names = STRING_LIST_INIT_NODUP; struct stale_heads_info info; info.remote = remote; info.ref_names = &ref_names; @@ -319,6 +319,10 @@ static int handle_cache(const char *path, unsigned char *sha1, const char *outpu if (!mmfile[i].ptr && !mmfile[i].size) mmfile[i].ptr = xstrdup(""); } + /* + * NEEDSWORK: handle conflicts from merges with + * merge.renormalize set, too + */ ll_merge(&result, path, &mmfile[0], NULL, &mmfile[1], "ours", &mmfile[2], "theirs", 0); @@ -378,7 +382,13 @@ static int merge(const char *name, const char *path) } ret = ll_merge(&result, path, &base, NULL, &cur, "", &other, "", 0); if (!ret) { - FILE *f = fopen(path, "w"); + FILE *f; + + if (utime(rerere_path(name, "postimage"), NULL) < 0) + warning("failed utime() on %s: %s", + rerere_path(name, "postimage"), + strerror(errno)); + f = fopen(path, "w"); if (!f) return error("Could not open %s: %s", path, strerror(errno)); @@ -426,8 +436,8 @@ static int update_paths(struct string_list *update) static int do_plain_rerere(struct string_list *rr, int fd) { - struct string_list conflict = { NULL, 0, 0, 1 }; - struct string_list update = { NULL, 0, 0, 1 }; + struct string_list conflict = STRING_LIST_INIT_DUP; + struct string_list update = STRING_LIST_INIT_DUP; int i; find_conflict(&conflict); @@ -547,7 +557,7 @@ int setup_rerere(struct string_list *merge_rr, int flags) int rerere(int flags) { - struct string_list merge_rr = { NULL, 0, 0, 1 }; + struct string_list merge_rr = STRING_LIST_INIT_DUP; int fd; fd = setup_rerere(&merge_rr, flags); @@ -585,8 +595,8 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr) int rerere_forget(const char **pathspec) { int i, fd; - struct string_list conflict = { NULL, 0, 0, 1 }; - struct string_list merge_rr = { NULL, 0, 0, 1 }; + struct string_list conflict = STRING_LIST_INIT_DUP; + struct string_list merge_rr = STRING_LIST_INIT_DUP; if (read_cache() < 0) return error("Could not read index"); diff --git a/resolve-undo.c b/resolve-undo.c index 174ebec9e5..72b46125b7 100644 --- a/resolve-undo.c +++ b/resolve-undo.c @@ -28,29 +28,25 @@ void record_resolve_undo(struct index_state *istate, struct cache_entry *ce) ui->mode[stage - 1] = ce->ce_mode; } -static int write_one(struct string_list_item *item, void *cbdata) +void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo) { - struct strbuf *sb = cbdata; - struct resolve_undo_info *ui = item->util; - int i; + struct string_list_item *item; + for_each_string_list_item(item, resolve_undo) { + struct resolve_undo_info *ui = item->util; + int i; - if (!ui) - return 0; - strbuf_addstr(sb, item->string); - strbuf_addch(sb, 0); - for (i = 0; i < 3; i++) - strbuf_addf(sb, "%o%c", ui->mode[i], 0); - for (i = 0; i < 3; i++) { - if (!ui->mode[i]) + if (!ui) continue; - strbuf_add(sb, ui->sha1[i], 20); + strbuf_addstr(sb, item->string); + strbuf_addch(sb, 0); + for (i = 0; i < 3; i++) + strbuf_addf(sb, "%o%c", ui->mode[i], 0); + for (i = 0; i < 3; i++) { + if (!ui->mode[i]) + continue; + strbuf_add(sb, ui->sha1[i], 20); + } } - return 0; -} - -void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo) -{ - for_each_string_list(resolve_undo, write_one, sb); } struct string_list *resolve_undo_read(const char *data, unsigned long size) diff --git a/revision.c b/revision.c index 7e82efd932..b1c18906ba 100644 --- a/revision.c +++ b/revision.c @@ -820,12 +820,12 @@ static void init_all_refs_cb(struct all_refs_cb *cb, struct rev_info *revs, cb->all_flags = flags; } -static void handle_refs(struct rev_info *revs, unsigned flags, - int (*for_each)(each_ref_fn, void *)) +static void handle_refs(const char *submodule, struct rev_info *revs, unsigned flags, + int (*for_each)(const char *, each_ref_fn, void *)) { struct all_refs_cb cb; init_all_refs_cb(&cb, revs, flags); - for_each(handle_one_ref, &cb); + for_each(submodule, handle_one_ref, &cb); } static void handle_one_reflog_commit(unsigned char *sha1, void *cb_data) @@ -1148,6 +1148,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg int *unkc, const char **unkv) { const char *arg = argv[0]; + const char *optarg; + int argcount; /* pseudo revision arguments */ if (!strcmp(arg, "--all") || !strcmp(arg, "--branches") || @@ -1160,11 +1162,13 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg return 1; } - if (!prefixcmp(arg, "--max-count=")) { - revs->max_count = atoi(arg + 12); + if ((argcount = parse_long_opt("max-count", argv, &optarg))) { + revs->max_count = atoi(optarg); revs->no_walk = 0; - } else if (!prefixcmp(arg, "--skip=")) { - revs->skip_count = atoi(arg + 7); + return argcount; + } else if ((argcount = parse_long_opt("skip", argv, &optarg))) { + revs->skip_count = atoi(optarg); + return argcount; } else if ((*arg == '-') && isdigit(arg[1])) { /* accept -<digit>, like traditional "head" */ revs->max_count = atoi(arg + 1); @@ -1178,18 +1182,24 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if (!prefixcmp(arg, "-n")) { revs->max_count = atoi(arg + 2); revs->no_walk = 0; - } else if (!prefixcmp(arg, "--max-age=")) { - revs->max_age = atoi(arg + 10); - } else if (!prefixcmp(arg, "--since=")) { - revs->max_age = approxidate(arg + 8); - } else if (!prefixcmp(arg, "--after=")) { - revs->max_age = approxidate(arg + 8); - } else if (!prefixcmp(arg, "--min-age=")) { - revs->min_age = atoi(arg + 10); - } else if (!prefixcmp(arg, "--before=")) { - revs->min_age = approxidate(arg + 9); - } else if (!prefixcmp(arg, "--until=")) { - revs->min_age = approxidate(arg + 8); + } else if ((argcount = parse_long_opt("max-age", argv, &optarg))) { + revs->max_age = atoi(optarg); + return argcount; + } else if ((argcount = parse_long_opt("since", argv, &optarg))) { + revs->max_age = approxidate(optarg); + return argcount; + } else if ((argcount = parse_long_opt("after", argv, &optarg))) { + revs->max_age = approxidate(optarg); + return argcount; + } else if ((argcount = parse_long_opt("min-age", argv, &optarg))) { + revs->min_age = atoi(optarg); + return argcount; + } else if ((argcount = parse_long_opt("before", argv, &optarg))) { + revs->min_age = approxidate(optarg); + return argcount; + } else if ((argcount = parse_long_opt("until", argv, &optarg))) { + revs->min_age = approxidate(optarg); + return argcount; } else if (!strcmp(arg, "--first-parent")) { revs->first_parent_only = 1; } else if (!strcmp(arg, "--ancestry-path")) { @@ -1295,6 +1305,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->pretty_given = 1; get_commit_format(arg+8, revs); } else if (!prefixcmp(arg, "--pretty=") || !prefixcmp(arg, "--format=")) { + /* + * Detached form ("--pretty X" as opposed to "--pretty=X") + * not allowed, since the argument is optional. + */ revs->verbose_header = 1; revs->pretty_given = 1; get_commit_format(arg+9, revs); @@ -1359,21 +1373,25 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if (!strcmp(arg, "--relative-date")) { revs->date_mode = DATE_RELATIVE; revs->date_mode_explicit = 1; - } else if (!strncmp(arg, "--date=", 7)) { - revs->date_mode = parse_date_format(arg + 7); + } else if ((argcount = parse_long_opt("date", argv, &optarg))) { + revs->date_mode = parse_date_format(optarg); revs->date_mode_explicit = 1; + return argcount; } else if (!strcmp(arg, "--log-size")) { revs->show_log_size = 1; } /* * Grepping the commit log */ - else if (!prefixcmp(arg, "--author=")) { - add_header_grep(revs, GREP_HEADER_AUTHOR, arg+9); - } else if (!prefixcmp(arg, "--committer=")) { - add_header_grep(revs, GREP_HEADER_COMMITTER, arg+12); - } else if (!prefixcmp(arg, "--grep=")) { - add_message_grep(revs, arg+7); + else if ((argcount = parse_long_opt("author", argv, &optarg))) { + add_header_grep(revs, GREP_HEADER_AUTHOR, optarg); + return argcount; + } else if ((argcount = parse_long_opt("committer", argv, &optarg))) { + add_header_grep(revs, GREP_HEADER_COMMITTER, optarg); + return argcount; + } else if ((argcount = parse_long_opt("grep", argv, &optarg))) { + add_message_grep(revs, optarg); + return argcount; } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) { revs->grep_filter.regflags |= REG_EXTENDED; } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) { @@ -1382,12 +1400,12 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->grep_filter.fixed = 1; } else if (!strcmp(arg, "--all-match")) { revs->grep_filter.all_match = 1; - } else if (!prefixcmp(arg, "--encoding=")) { - arg += 11; - if (strcmp(arg, "none")) - git_log_output_encoding = xstrdup(arg); + } else if ((argcount = parse_long_opt("encoding", argv, &optarg))) { + if (strcmp(optarg, "none")) + git_log_output_encoding = xstrdup(optarg); else git_log_output_encoding = ""; + return argcount; } else if (!strcmp(arg, "--reverse")) { revs->reverse ^= 1; } else if (!strcmp(arg, "--children")) { @@ -1417,14 +1435,14 @@ void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx, ctx->argc -= n; } -static int for_each_bad_bisect_ref(each_ref_fn fn, void *cb_data) +static int for_each_bad_bisect_ref(const char *submodule, each_ref_fn fn, void *cb_data) { - return for_each_ref_in("refs/bisect/bad", fn, cb_data); + return for_each_ref_in_submodule(submodule, "refs/bisect/bad", fn, cb_data); } -static int for_each_good_bisect_ref(each_ref_fn fn, void *cb_data) +static int for_each_good_bisect_ref(const char *submodule, each_ref_fn fn, void *cb_data) { - return for_each_ref_in("refs/bisect/good", fn, cb_data); + return for_each_ref_in_submodule(submodule, "refs/bisect/good", fn, cb_data); } static void append_prune_data(const char ***prune_data, const char **av) @@ -1466,6 +1484,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s { int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0; const char **prune_data = NULL; + const char *submodule = NULL; + const char *optarg; + int argcount; + + if (opt) + submodule = opt->submodule; /* First, search for "--" */ seen_dashdash = 0; @@ -1490,32 +1514,33 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s int opts; if (!strcmp(arg, "--all")) { - handle_refs(revs, flags, for_each_ref); - handle_refs(revs, flags, head_ref); + handle_refs(submodule, revs, flags, for_each_ref_submodule); + handle_refs(submodule, revs, flags, head_ref_submodule); continue; } if (!strcmp(arg, "--branches")) { - handle_refs(revs, flags, for_each_branch_ref); + handle_refs(submodule, revs, flags, for_each_branch_ref_submodule); continue; } if (!strcmp(arg, "--bisect")) { - handle_refs(revs, flags, for_each_bad_bisect_ref); - handle_refs(revs, flags ^ UNINTERESTING, for_each_good_bisect_ref); + handle_refs(submodule, revs, flags, for_each_bad_bisect_ref); + handle_refs(submodule, revs, flags ^ UNINTERESTING, for_each_good_bisect_ref); revs->bisect = 1; continue; } if (!strcmp(arg, "--tags")) { - handle_refs(revs, flags, for_each_tag_ref); + handle_refs(submodule, revs, flags, for_each_tag_ref_submodule); continue; } if (!strcmp(arg, "--remotes")) { - handle_refs(revs, flags, for_each_remote_ref); + handle_refs(submodule, revs, flags, for_each_remote_ref_submodule); continue; } - if (!prefixcmp(arg, "--glob=")) { + if ((argcount = parse_long_opt("glob", argv + i, &optarg))) { struct all_refs_cb cb; + i += argcount - 1; init_all_refs_cb(&cb, revs, flags); - for_each_glob_ref(handle_one_ref, arg + 7, &cb); + for_each_glob_ref(handle_one_ref, optarg, &cb); continue; } if (!prefixcmp(arg, "--branches=")) { diff --git a/revision.h b/revision.h index 36fdf22b29..05659c64ac 100644 --- a/revision.h +++ b/revision.h @@ -151,6 +151,7 @@ extern volatile show_early_output_fn_t show_early_output; struct setup_revision_opt { const char *def; void (*tweak)(struct rev_info *, struct setup_revision_opt *); + const char *submodule; }; extern void init_revisions(struct rev_info *revs, const char *prefix); @@ -313,21 +313,129 @@ const char *read_gitfile_gently(const char *path) return path; } +static const char *setup_explicit_git_dir(const char *gitdirenv, + const char *work_tree_env, int *nongit_ok) +{ + static char buffer[1024 + 1]; + const char *retval; + + if (PATH_MAX - 40 < strlen(gitdirenv)) + die("'$%s' too big", GIT_DIR_ENVIRONMENT); + if (!is_git_directory(gitdirenv)) { + if (nongit_ok) { + *nongit_ok = 1; + return NULL; + } + die("Not a git repository: '%s'", gitdirenv); + } + if (!work_tree_env) { + retval = set_work_tree(gitdirenv); + /* config may override worktree */ + if (check_repository_format_gently(nongit_ok)) + return NULL; + return retval; + } + if (check_repository_format_gently(nongit_ok)) + return NULL; + retval = get_relative_cwd(buffer, sizeof(buffer) - 1, + get_git_work_tree()); + if (!retval || !*retval) + return NULL; + set_git_dir(make_absolute_path(gitdirenv)); + if (chdir(work_tree_env) < 0) + die_errno ("Could not chdir to '%s'", work_tree_env); + strcat(buffer, "/"); + return retval; +} + +static int cwd_contains_git_dir(const char **gitfile_dirp) +{ + const char *gitfile_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT); + *gitfile_dirp = gitfile_dir; + if (gitfile_dir) { + if (set_git_dir(gitfile_dir)) + die("Repository setup failed"); + return 1; + } + return is_git_directory(DEFAULT_GIT_DIR_ENVIRONMENT); +} + +static const char *setup_discovered_git_dir(const char *work_tree_env, + int offset, int len, char *cwd, int *nongit_ok) +{ + int root_len; + + inside_git_dir = 0; + if (!work_tree_env) + inside_work_tree = 1; + root_len = offset_1st_component(cwd); + git_work_tree_cfg = xstrndup(cwd, offset > root_len ? offset : root_len); + if (check_repository_format_gently(nongit_ok)) + return NULL; + if (offset == len) + return NULL; + + /* Make "offset" point to past the '/', and add a '/' at the end */ + offset++; + cwd[len++] = '/'; + cwd[len] = 0; + return cwd + offset; +} + +static const char *setup_bare_git_dir(const char *work_tree_env, + int offset, int len, char *cwd, int *nongit_ok) +{ + int root_len; + + inside_git_dir = 1; + if (!work_tree_env) + inside_work_tree = 0; + if (offset != len) { + if (chdir(cwd)) + die_errno("Cannot come back to cwd"); + root_len = offset_1st_component(cwd); + cwd[offset > root_len ? offset : root_len] = '\0'; + set_git_dir(cwd); + } else + set_git_dir("."); + check_repository_format_gently(nongit_ok); + return NULL; +} + +static const char *setup_nongit(const char *cwd, int *nongit_ok) +{ + if (!nongit_ok) + die("Not a git repository (or any of the parent directories): %s", DEFAULT_GIT_DIR_ENVIRONMENT); + if (chdir(cwd)) + die_errno("Cannot come back to cwd"); + *nongit_ok = 1; + return NULL; +} + +static dev_t get_device_or_die(const char *path, const char *prefix) +{ + struct stat buf; + if (stat(path, &buf)) + die_errno("failed to stat '%s%s%s'", + prefix ? prefix : "", + prefix ? "/" : "", path); + return buf.st_dev; +} + /* * We cannot decide in this function whether we are in the work tree or * not, since the config can only be read _after_ this function was called. */ -const char *setup_git_directory_gently(int *nongit_ok) +static const char *setup_git_directory_gently_1(int *nongit_ok) { const char *work_tree_env = getenv(GIT_WORK_TREE_ENVIRONMENT); const char *env_ceiling_dirs = getenv(CEILING_DIRECTORIES_ENVIRONMENT); static char cwd[PATH_MAX+1]; const char *gitdirenv; const char *gitfile_dir; - int len, offset, ceil_offset, root_len; + int len, offset, ceil_offset; dev_t current_device = 0; int one_filesystem = 1; - struct stat buf; /* * Let's assume that we are in a git repository. @@ -343,38 +451,8 @@ const char *setup_git_directory_gently(int *nongit_ok) * validation. */ gitdirenv = getenv(GIT_DIR_ENVIRONMENT); - if (gitdirenv) { - if (PATH_MAX - 40 < strlen(gitdirenv)) - die("'$%s' too big", GIT_DIR_ENVIRONMENT); - if (is_git_directory(gitdirenv)) { - static char buffer[1024 + 1]; - const char *retval; - - if (!work_tree_env) { - retval = set_work_tree(gitdirenv); - /* config may override worktree */ - if (check_repository_format_gently(nongit_ok)) - return NULL; - return retval; - } - if (check_repository_format_gently(nongit_ok)) - return NULL; - retval = get_relative_cwd(buffer, sizeof(buffer) - 1, - get_git_work_tree()); - if (!retval || !*retval) - return NULL; - set_git_dir(make_absolute_path(gitdirenv)); - if (chdir(work_tree_env) < 0) - die_errno ("Could not chdir to '%s'", work_tree_env); - strcat(buffer, "/"); - return retval; - } - if (nongit_ok) { - *nongit_ok = 1; - return NULL; - } - die("Not a git repository: '%s'", gitdirenv); - } + if (gitdirenv) + return setup_explicit_git_dir(gitdirenv, work_tree_env, nongit_ok); if (!getcwd(cwd, sizeof(cwd)-1)) die_errno("Unable to read current working directory"); @@ -396,49 +474,21 @@ const char *setup_git_directory_gently(int *nongit_ok) */ offset = len = strlen(cwd); one_filesystem = !git_env_bool("GIT_DISCOVERY_ACROSS_FILESYSTEM", 0); - if (one_filesystem) { - if (stat(".", &buf)) - die_errno("failed to stat '.'"); - current_device = buf.st_dev; - } + if (one_filesystem) + current_device = get_device_or_die(".", NULL); for (;;) { - gitfile_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT); - if (gitfile_dir) { - if (set_git_dir(gitfile_dir)) - die("Repository setup failed"); - break; - } - if (is_git_directory(DEFAULT_GIT_DIR_ENVIRONMENT)) - break; - if (is_git_directory(".")) { - inside_git_dir = 1; - if (!work_tree_env) - inside_work_tree = 0; - if (offset != len) { - root_len = offset_1st_component(cwd); - cwd[offset > root_len ? offset : root_len] = '\0'; - set_git_dir(cwd); - } else - set_git_dir("."); - check_repository_format_gently(nongit_ok); - return NULL; - } + if (cwd_contains_git_dir(&gitfile_dir)) + return setup_discovered_git_dir(work_tree_env, offset, + len, cwd, nongit_ok); + if (is_git_directory(".")) + return setup_bare_git_dir(work_tree_env, offset, + len, cwd, nongit_ok); while (--offset > ceil_offset && cwd[offset] != '/'); - if (offset <= ceil_offset) { - if (nongit_ok) { - if (chdir(cwd)) - die_errno("Cannot come back to cwd"); - *nongit_ok = 1; - return NULL; - } - die("Not a git repository (or any of the parent directories): %s", DEFAULT_GIT_DIR_ENVIRONMENT); - } + if (offset <= ceil_offset) + return setup_nongit(cwd, nongit_ok); if (one_filesystem) { - if (stat("..", &buf)) { - cwd[offset] = '\0'; - die_errno("failed to stat '%s/..'", cwd); - } - if (buf.st_dev != current_device) { + dev_t parent_device = get_device_or_die("..", cwd); + if (parent_device != current_device) { if (nongit_ok) { if (chdir(cwd)) die_errno("Cannot come back to cwd"); @@ -455,22 +505,16 @@ const char *setup_git_directory_gently(int *nongit_ok) die_errno("Cannot change to '%s/..'", cwd); } } +} - inside_git_dir = 0; - if (!work_tree_env) - inside_work_tree = 1; - root_len = offset_1st_component(cwd); - git_work_tree_cfg = xstrndup(cwd, offset > root_len ? offset : root_len); - if (check_repository_format_gently(nongit_ok)) - return NULL; - if (offset == len) - return NULL; +const char *setup_git_directory_gently(int *nongit_ok) +{ + const char *prefix; - /* Make "offset" point to past the '/', and add a '/' at the end */ - offset++; - cwd[len++] = '/'; - cwd[len] = 0; - return cwd + offset; + prefix = setup_git_directory_gently_1(nongit_ok); + if (startup_info) + startup_info->have_repository = !nongit_ok || !*nongit_ok; + return prefix; } int git_config_perm(const char *var, const char *value) diff --git a/sha1_file.c b/sha1_file.c index e42ef96d45..0cd9435619 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2086,6 +2086,7 @@ void *read_sha1_file_repl(const unsigned char *sha1, { const unsigned char *repl = lookup_replace_object(sha1); void *data = read_object(repl, type, size); + char *path; /* die if we replaced an object with one that does not exist */ if (!data && repl != sha1) @@ -2093,8 +2094,16 @@ void *read_sha1_file_repl(const unsigned char *sha1, sha1_to_hex(repl), sha1_to_hex(sha1)); /* legacy behavior is to die on corrupted objects */ - if (!data && (has_loose_object(repl) || has_packed_and_bad(repl))) - die("object %s is corrupted", sha1_to_hex(repl)); + if (!data) { + if (has_loose_object(repl)) { + path = sha1_file_name(sha1); + die("loose object %s (stored in %s) is corrupted", sha1_to_hex(repl), path); + } + if (has_packed_and_bad(repl)) { + path = sha1_pack_name(sha1); + die("packed object %s (stored in %s) is corrupted", sha1_to_hex(repl), path); + } + } if (replacement) *replacement = repl; diff --git a/sha1_name.c b/sha1_name.c index 13209dc5b8..7b7e61719f 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -659,6 +659,16 @@ static int get_sha1_1(const char *name, int len, unsigned char *sha1) return get_short_sha1(name, len, sha1, 0); } +/* + * This interprets names like ':/Initial revision of "git"' by searching + * through history and returning the first commit whose message starts + * the given regular expression. + * + * For future extension, ':/!' is reserved. If you want to match a message + * beginning with a '!', you have to repeat the exclamation mark. + */ +#define ONELINE_SEEN (1u<<20) + static int handle_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) { @@ -674,19 +684,10 @@ static int handle_one_ref(const char *path, if (object->type != OBJ_COMMIT) return 0; insert_by_date((struct commit *)object, list); + object->flags |= ONELINE_SEEN; return 0; } -/* - * This interprets names like ':/Initial revision of "git"' by searching - * through history and returning the first commit whose message matches - * the given regular expression. - * - * For future extension, ':/!' is reserved. If you want to match a message - * beginning with a '!', you have to repeat the exclamation mark. - */ - -#define ONELINE_SEEN (1u<<20) static int get_sha1_oneline(const char *prefix, unsigned char *sha1) { struct commit_list *list = NULL, *backup = NULL, *l; @@ -47,7 +47,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth, { int i = 0, cur_depth = 0; struct commit_list *result = NULL; - struct object_array stack = {0, 0, NULL}; + struct object_array stack = OBJECT_ARRAY_INIT; struct commit *commit = NULL; while (commit || i < heads->nr || stack.nr) { diff --git a/string-list.h b/string-list.h index 680d600d16..494693898b 100644 --- a/string-list.h +++ b/string-list.h @@ -12,6 +12,9 @@ struct string_list unsigned int strdup_strings:1; }; +#define STRING_LIST_INIT_NODUP { NULL, 0, 0, 0 } +#define STRING_LIST_INIT_DUP { NULL, 0, 0, 1 } + void print_string_list(const struct string_list *p, const char *text); void string_list_clear(struct string_list *list, int free_util); @@ -20,10 +23,12 @@ void string_list_clear(struct string_list *list, int free_util); typedef void (*string_list_clear_func_t)(void *p, const char *str); void string_list_clear_func(struct string_list *list, string_list_clear_func_t clearfunc); -/* Use this function to iterate over each item */ +/* Use this function or the macro below to iterate over each item */ typedef int (*string_list_each_func_t)(struct string_list_item *, void *); int for_each_string_list(struct string_list *list, string_list_each_func_t, void *cb_data); +#define for_each_string_list_item(item,list) \ + for (item = (list)->items; item < (list)->items + (list)->nr; ++item) /* Use these functions only on sorted lists: */ int string_list_has_string(const struct string_list *list, const char *string); diff --git a/submodule.c b/submodule.c index 61cb6e21dd..91a4758747 100644 --- a/submodule.c +++ b/submodule.c @@ -6,6 +6,11 @@ #include "revision.h" #include "run-command.h" #include "diffcore.h" +#include "refs.h" +#include "string-list.h" + +struct string_list config_name_for_path; +struct string_list config_ignore_for_name; static int add_submodule_odb(const char *path) { @@ -46,16 +51,90 @@ done: return ret; } +void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, + const char *path) +{ + struct string_list_item *path_option, *ignore_option; + path_option = unsorted_string_list_lookup(&config_name_for_path, path); + if (path_option) { + ignore_option = unsorted_string_list_lookup(&config_ignore_for_name, path_option->util); + if (ignore_option) + handle_ignore_submodules_arg(diffopt, ignore_option->util); + } +} + +static int submodule_config(const char *var, const char *value, void *cb) +{ + if (!prefixcmp(var, "submodule.")) + return parse_submodule_config_option(var, value); + return 0; +} + +void gitmodules_config(void) +{ + const char *work_tree = get_git_work_tree(); + if (work_tree) { + struct strbuf gitmodules_path = STRBUF_INIT; + strbuf_addstr(&gitmodules_path, work_tree); + strbuf_addstr(&gitmodules_path, "/.gitmodules"); + git_config_from_file(submodule_config, gitmodules_path.buf, NULL); + strbuf_release(&gitmodules_path); + } +} + +int parse_submodule_config_option(const char *var, const char *value) +{ + int len; + struct string_list_item *config; + struct strbuf submodname = STRBUF_INIT; + + var += 10; /* Skip "submodule." */ + + len = strlen(var); + if ((len > 5) && !strcmp(var + len - 5, ".path")) { + strbuf_add(&submodname, var, len - 5); + config = unsorted_string_list_lookup(&config_name_for_path, value); + if (config) + free(config->util); + else + config = string_list_append(&config_name_for_path, xstrdup(value)); + config->util = strbuf_detach(&submodname, NULL); + strbuf_release(&submodname); + } else if ((len > 7) && !strcmp(var + len - 7, ".ignore")) { + if (strcmp(value, "untracked") && strcmp(value, "dirty") && + strcmp(value, "all") && strcmp(value, "none")) { + warning("Invalid parameter \"%s\" for config option \"submodule.%s.ignore\"", value, var); + return 0; + } + + strbuf_add(&submodname, var, len - 7); + config = unsorted_string_list_lookup(&config_ignore_for_name, submodname.buf); + if (config) + free(config->util); + else + config = string_list_append(&config_ignore_for_name, + strbuf_detach(&submodname, NULL)); + strbuf_release(&submodname); + config->util = xstrdup(value); + return 0; + } + return 0; +} + void handle_ignore_submodules_arg(struct diff_options *diffopt, const char *arg) { + DIFF_OPT_CLR(diffopt, IGNORE_SUBMODULES); + DIFF_OPT_CLR(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES); + DIFF_OPT_CLR(diffopt, IGNORE_DIRTY_SUBMODULES); + if (!strcmp(arg, "all")) DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES); else if (!strcmp(arg, "untracked")) DIFF_OPT_SET(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES); else if (!strcmp(arg, "dirty")) DIFF_OPT_SET(diffopt, IGNORE_DIRTY_SUBMODULES); - else + else if (strcmp(arg, "none")) die("bad --ignore-submodules argument: %s", arg); } @@ -218,3 +297,163 @@ unsigned is_submodule_modified(const char *path, int ignore_untracked) strbuf_release(&buf); return dirty_submodule; } + +static int find_first_merges(struct object_array *result, const char *path, + struct commit *a, struct commit *b) +{ + int i, j; + struct object_array merges; + struct commit *commit; + int contains_another; + + char merged_revision[42]; + const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path", + "--all", merged_revision, NULL }; + struct rev_info revs; + struct setup_revision_opt rev_opts; + + memset(&merges, 0, sizeof(merges)); + memset(result, 0, sizeof(struct object_array)); + memset(&rev_opts, 0, sizeof(rev_opts)); + + /* get all revisions that merge commit a */ + snprintf(merged_revision, sizeof(merged_revision), "^%s", + sha1_to_hex(a->object.sha1)); + init_revisions(&revs, NULL); + rev_opts.submodule = path; + setup_revisions(sizeof(rev_args)/sizeof(char *)-1, rev_args, &revs, &rev_opts); + + /* save all revisions from the above list that contain b */ + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + while ((commit = get_revision(&revs)) != NULL) { + struct object *o = &(commit->object); + if (in_merge_bases(b, &commit, 1)) + add_object_array(o, NULL, &merges); + } + + /* Now we've got all merges that contain a and b. Prune all + * merges that contain another found merge and save them in + * result. + */ + for (i = 0; i < merges.nr; i++) { + struct commit *m1 = (struct commit *) merges.objects[i].item; + + contains_another = 0; + for (j = 0; j < merges.nr; j++) { + struct commit *m2 = (struct commit *) merges.objects[j].item; + if (i != j && in_merge_bases(m2, &m1, 1)) { + contains_another = 1; + break; + } + } + + if (!contains_another) + add_object_array(merges.objects[i].item, + merges.objects[i].name, result); + } + + free(merges.objects); + return result->nr; +} + +static void print_commit(struct commit *commit) +{ + struct strbuf sb = STRBUF_INIT; + struct pretty_print_context ctx = {0}; + ctx.date_mode = DATE_NORMAL; + format_commit_message(commit, " %h: %m %s", &sb, &ctx); + fprintf(stderr, "%s\n", sb.buf); + strbuf_release(&sb); +} + +#define MERGE_WARNING(path, msg) \ + warning("Failed to merge submodule %s (%s)", path, msg); + +int merge_submodule(unsigned char result[20], const char *path, + const unsigned char base[20], const unsigned char a[20], + const unsigned char b[20]) +{ + struct commit *commit_base, *commit_a, *commit_b; + int parent_count; + struct object_array merges; + + int i; + + /* store a in result in case we fail */ + hashcpy(result, a); + + /* we can not handle deletion conflicts */ + if (is_null_sha1(base)) + return 0; + if (is_null_sha1(a)) + return 0; + if (is_null_sha1(b)) + return 0; + + if (add_submodule_odb(path)) { + MERGE_WARNING(path, "not checked out"); + return 0; + } + + if (!(commit_base = lookup_commit_reference(base)) || + !(commit_a = lookup_commit_reference(a)) || + !(commit_b = lookup_commit_reference(b))) { + MERGE_WARNING(path, "commits not present"); + return 0; + } + + /* check whether both changes are forward */ + if (!in_merge_bases(commit_base, &commit_a, 1) || + !in_merge_bases(commit_base, &commit_b, 1)) { + MERGE_WARNING(path, "commits don't follow merge-base"); + return 0; + } + + /* Case #1: a is contained in b or vice versa */ + if (in_merge_bases(commit_a, &commit_b, 1)) { + hashcpy(result, b); + return 1; + } + if (in_merge_bases(commit_b, &commit_a, 1)) { + hashcpy(result, a); + return 1; + } + + /* + * Case #2: There are one or more merges that contain a and b in + * the submodule. If there is only one, then present it as a + * suggestion to the user, but leave it marked unmerged so the + * user needs to confirm the resolution. + */ + + /* find commit which merges them */ + parent_count = find_first_merges(&merges, path, commit_a, commit_b); + switch (parent_count) { + case 0: + MERGE_WARNING(path, "merge following commits not found"); + break; + + case 1: + MERGE_WARNING(path, "not fast-forward"); + fprintf(stderr, "Found a possible merge resolution " + "for the submodule:\n"); + print_commit((struct commit *) merges.objects[0].item); + fprintf(stderr, + "If this is correct simply add it to the index " + "for example\n" + "by using:\n\n" + " git update-index --cacheinfo 160000 %s \"%s\"\n\n" + "which will accept this suggestion.\n", + sha1_to_hex(merges.objects[0].item->sha1), path); + break; + + default: + MERGE_WARNING(path, "multiple merges found"); + for (i = 0; i < merges.nr; i++) + print_commit((struct commit *) merges.objects[i].item); + } + + free(merges.objects); + return 0; +} diff --git a/submodule.h b/submodule.h index 6fd3bb4070..386f410a66 100644 --- a/submodule.h +++ b/submodule.h @@ -3,11 +3,17 @@ struct diff_options; +void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, + const char *path); +void gitmodules_config(); +int parse_submodule_config_option(const char *var, const char *value); void handle_ignore_submodules_arg(struct diff_options *diffopt, const char *); void show_submodule_summary(FILE *f, const char *path, unsigned char one[20], unsigned char two[20], unsigned dirty_submodule, const char *del, const char *add, const char *reset); unsigned is_submodule_modified(const char *path, int ignore_untracked); +int merge_submodule(unsigned char result[20], const char *path, const unsigned char base[20], + const unsigned char a[20], const unsigned char b[20]); #endif diff --git a/t/.gitignore b/t/.gitignore index 7dcbb232cd..4e731dc1e3 100644 --- a/t/.gitignore +++ b/t/.gitignore @@ -1,2 +1,3 @@ /trash directory* /test-results +/.prove diff --git a/t/Makefile b/t/Makefile index cf5f9e2e1e..819b936870 100644 --- a/t/Makefile +++ b/t/Makefile @@ -30,6 +30,7 @@ clean: $(RM) -r 'trash directory'.* test-results $(RM) t????/cvsroot/CVSROOT/?* $(RM) -r valgrind/bin + $(RM) .prove aggregate-results-and-cleanup: $(T) $(MAKE) aggregate-results @@ -467,6 +467,13 @@ library for your script to use. <expected> file. This behaves like "cmp" but produces more helpful output when the test is run with "-v" option. + - test_path_is_file <file> [<diagnosis>] + test_path_is_dir <dir> [<diagnosis>] + test_path_is_missing <path> [<diagnosis>] + + Check whether a file/directory exists or doesn't. <diagnosis> will + be displayed if the test fails. + - test_when_finished <script> Prepend <script> to a list of commands to run to clean up diff --git a/t/lib-git-svn.sh b/t/lib-git-svn.sh index c3f6676ca2..92d6d31942 100644 --- a/t/lib-git-svn.sh +++ b/t/lib-git-svn.sh @@ -16,7 +16,6 @@ fi GIT_DIR=$PWD/.git GIT_SVN_DIR=$GIT_DIR/svn/refs/remotes/git-svn SVN_TREE=$GIT_SVN_DIR/svn-tree -PERL=${PERL:-perl} svn >/dev/null 2>&1 if test $? -ne 1 @@ -30,7 +29,7 @@ export svnrepo svnconf=$PWD/svnconf export svnconf -$PERL -w -e " +"$PERL_PATH" -w -e " use SVN::Core; use SVN::Repos; \$SVN::Core::VERSION gt '1.1.0' or exit(42); @@ -130,7 +129,7 @@ stop_httpd () { } convert_to_rev_db () { - $PERL -w -- - "$@" <<\EOF + "$PERL_PATH" -w -- - "$@" <<\EOF use strict; @ARGV == 2 or die "Usage: convert_to_rev_db <input> <output>"; open my $wr, '+>', $ARGV[1] or die "$!: couldn't open: $ARGV[1]"; diff --git a/t/lib-rebase.sh b/t/lib-rebase.sh index 6aefe27593..6ccf797091 100644 --- a/t/lib-rebase.sh +++ b/t/lib-rebase.sh @@ -47,6 +47,8 @@ for line in $FAKE_LINES; do case $line in squash|fixup|edit|reword) action="$line";; + exec*) + echo "$line" | sed 's/_/ /g' >> "$1";; "#") echo '# comment' >> "$1";; ">") diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh index 53bd7fcc4a..de38c7f7aa 100755 --- a/t/t0003-attributes.sh +++ b/t/t0003-attributes.sh @@ -48,11 +48,11 @@ test_expect_success 'attribute test' ' attr_check a/b/g a/b/g && attr_check b/g unspecified && attr_check a/b/h a/b/h && - attr_check a/b/d/g "a/b/d/*" - attr_check onoff unset - attr_check offon set - attr_check no unspecified - attr_check a/b/d/no "a/b/d/*" + attr_check a/b/d/g "a/b/d/*" && + attr_check onoff unset && + attr_check offon set && + attr_check no unspecified && + attr_check a/b/d/no "a/b/d/*" && attr_check a/b/d/yes unspecified ' diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh new file mode 100755 index 0000000000..d3225ada68 --- /dev/null +++ b/t/t0080-vcs-svn.sh @@ -0,0 +1,171 @@ +#!/bin/sh + +test_description='check infrastructure for svn importer' + +. ./test-lib.sh +uint32_max=4294967295 + +test_expect_success 'obj pool: store data' ' + cat <<-\EOF >expected && + 0 + 1 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + set one 13 + test one 13 + reset one + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: NULL is offset ~0' ' + echo "$uint32_max" >expected && + echo null one | test-obj-pool >actual && + test_cmp expected actual +' + +test_expect_success 'obj pool: out-of-bounds access' ' + cat <<-EOF >expected && + 0 + 0 + $uint32_max + $uint32_max + 16 + 20 + $uint32_max + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + alloc two 16 + offset one 20 + offset two 20 + alloc one 5 + offset one 20 + free one 1 + offset one 20 + reset one + reset two + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: high-water mark' ' + cat <<-\EOF >expected && + 0 + 0 + 10 + 20 + 20 + 20 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 10 + committed one + alloc one 10 + commit one + committed one + alloc one 10 + free one 20 + committed one + reset one + EOF + test_cmp expected actual +' + +test_expect_success 'line buffer' ' + echo HELLO >expected1 && + printf "%s\n" "" HELLO >expected2 && + echo >expected3 && + printf "%s\n" "" Q | q_to_nul >expected4 && + printf "%s\n" foo "" >expected5 && + printf "%s\n" "" foo >expected6 && + + test-line-buffer <<-\EOF >actual1 && + 5 + HELLO + EOF + + test-line-buffer <<-\EOF >actual2 && + 0 + + 5 + HELLO + EOF + + q_to_nul <<-\EOF | + 1 + Q + EOF + test-line-buffer >actual3 && + + q_to_nul <<-\EOF | + 0 + + 1 + Q + EOF + test-line-buffer >actual4 && + + test-line-buffer <<-\EOF >actual5 && + 5 + foo + EOF + + test-line-buffer <<-\EOF >actual6 && + 0 + + 5 + foo + EOF + + test_cmp expected1 actual1 && + test_cmp expected2 actual2 && + test_cmp expected3 actual3 && + test_cmp expected4 actual4 && + test_cmp expected5 actual5 && + test_cmp expected6 actual6 +' + +test_expect_success 'string pool' ' + echo a does not equal b >expected.differ && + echo a equals a >expected.match && + echo equals equals equals >expected.matchmore && + + test-string-pool "a,--b" >actual.differ && + test-string-pool "a,a" >actual.match && + test-string-pool "equals-equals" >actual.matchmore && + test_must_fail test-string-pool a,a,a && + test_must_fail test-string-pool a && + + test_cmp expected.differ actual.differ && + test_cmp expected.match actual.match && + test_cmp expected.matchmore actual.matchmore +' + +test_expect_success 'treap sort' ' + cat <<-\EOF >unsorted && + 68 + 12 + 13 + 13 + 68 + 13 + 13 + 21 + 10 + 11 + 12 + 13 + 13 + EOF + sort unsorted >expected && + + test-treap <unsorted >actual && + test_cmp expected actual +' + +test_done diff --git a/t/t1001-read-tree-m-2way.sh b/t/t1001-read-tree-m-2way.sh index 0c562bb820..93ca84f9e6 100755 --- a/t/t1001-read-tree-m-2way.sh +++ b/t/t1001-read-tree-m-2way.sh @@ -359,7 +359,7 @@ test_expect_success \ test_expect_success \ 'a/b (untracked) vs a, plus c/d case test.' \ - '! git read-tree -u -m "$treeH" "$treeM" && + 'test_must_fail git read-tree -u -m "$treeH" "$treeM" && git ls-files --stage && test -f a/b' diff --git a/t/t1011-read-tree-sparse-checkout.sh b/t/t1011-read-tree-sparse-checkout.sh index 62246dbf95..9a07de1a5b 100755 --- a/t/t1011-read-tree-sparse-checkout.sh +++ b/t/t1011-read-tree-sparse-checkout.sh @@ -1,16 +1,30 @@ #!/bin/sh -test_description='sparse checkout tests' +test_description='sparse checkout tests + +* (tag: removed, master) removed +| D sub/added +* (HEAD, tag: top) modified and added +| M init.t +| A sub/added +* (tag: init) init + A init.t +' . ./test-lib.sh -cat >expected <<EOF -100644 77f0ba1734ed79d12881f81b36ee134de6a3327b 0 init.t -100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 sub/added -EOF test_expect_success 'setup' ' + cat >expected <<-\EOF && + 100644 77f0ba1734ed79d12881f81b36ee134de6a3327b 0 init.t + 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 sub/added + EOF + cat >expected.swt <<-\EOF && + H init.t + H sub/added + EOF + test_commit init && - echo modified >> init.t && + echo modified >>init.t && mkdir sub && touch sub/added && git add init.t sub/added && @@ -20,26 +34,22 @@ test_expect_success 'setup' ' git commit -m removed && git tag removed && git checkout top && - git ls-files --stage > result && + git ls-files --stage >result && test_cmp expected result ' -cat >expected.swt <<EOF -H init.t -H sub/added -EOF test_expect_success 'read-tree without .git/info/sparse-checkout' ' git read-tree -m -u HEAD && - git ls-files --stage > result && + git ls-files --stage >result && test_cmp expected result && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result ' test_expect_success 'read-tree with .git/info/sparse-checkout but disabled' ' - echo > .git/info/sparse-checkout + echo >.git/info/sparse-checkout git read-tree -m -u HEAD && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test -f init.t && test -f sub/added @@ -47,9 +57,9 @@ test_expect_success 'read-tree with .git/info/sparse-checkout but disabled' ' test_expect_success 'read-tree --no-sparse-checkout with empty .git/info/sparse-checkout and enabled' ' git config core.sparsecheckout true && - echo > .git/info/sparse-checkout && + echo >.git/info/sparse-checkout && git read-tree --no-sparse-checkout -m -u HEAD && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test -f init.t && test -f sub/added @@ -57,94 +67,113 @@ test_expect_success 'read-tree --no-sparse-checkout with empty .git/info/sparse- test_expect_success 'read-tree with empty .git/info/sparse-checkout' ' git config core.sparsecheckout true && - echo > .git/info/sparse-checkout && + echo >.git/info/sparse-checkout && test_must_fail git read-tree -m -u HEAD && - git ls-files --stage > result && + git ls-files --stage >result && test_cmp expected result && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test -f init.t && test -f sub/added ' -cat >expected.swt <<EOF -S init.t -H sub/added -EOF test_expect_success 'match directories with trailing slash' ' + cat >expected.swt-noinit <<-\EOF && + S init.t + H sub/added + EOF + echo sub/ > .git/info/sparse-checkout && git read-tree -m -u HEAD && git ls-files -t > result && - test_cmp expected.swt result && + test_cmp expected.swt-noinit result && test ! -f init.t && test -f sub/added ' -cat >expected.swt <<EOF -H init.t -H sub/added -EOF test_expect_failure 'match directories without trailing slash' ' - echo init.t > .git/info/sparse-checkout && - echo sub >> .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && + echo sub >>.git/info/sparse-checkout && git read-tree -m -u HEAD && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test ! -f init.t && test -f sub/added ' -cat >expected.swt <<EOF -H init.t -S sub/added -EOF test_expect_success 'checkout area changes' ' - echo init.t > .git/info/sparse-checkout && + cat >expected.swt-nosub <<-\EOF && + H init.t + S sub/added + EOF + + echo init.t >.git/info/sparse-checkout && git read-tree -m -u HEAD && - git ls-files -t > result && - test_cmp expected.swt result && + git ls-files -t >result && + test_cmp expected.swt-nosub result && test -f init.t && test ! -f sub/added ' test_expect_success 'read-tree updates worktree, absent case' ' - echo sub/added > .git/info/sparse-checkout && + echo sub/added >.git/info/sparse-checkout && git checkout -f top && git read-tree -m -u HEAD^ && test ! -f init.t ' test_expect_success 'read-tree updates worktree, dirty case' ' - echo sub/added > .git/info/sparse-checkout && + echo sub/added >.git/info/sparse-checkout && git checkout -f top && - echo dirty > init.t && + echo dirty >init.t && git read-tree -m -u HEAD^ && grep -q dirty init.t && rm init.t ' test_expect_success 'read-tree removes worktree, dirty case' ' - echo init.t > .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && git checkout -f top && - echo dirty > added && + echo dirty >added && git read-tree -m -u HEAD^ && grep -q dirty added ' test_expect_success 'read-tree adds to worktree, absent case' ' - echo init.t > .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && git checkout -f removed && git read-tree -u -m HEAD^ && test ! -f sub/added ' test_expect_success 'read-tree adds to worktree, dirty case' ' - echo init.t > .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && git checkout -f removed && mkdir sub && - echo dirty > sub/added && + echo dirty >sub/added && git read-tree -u -m HEAD^ && grep -q dirty sub/added ' +test_expect_success 'index removal and worktree narrowing at the same time' ' + >empty && + echo init.t >.git/info/sparse-checkout && + echo sub/added >>.git/info/sparse-checkout && + git checkout -f top && + echo init.t >.git/info/sparse-checkout && + git checkout removed && + git ls-files sub/added >result && + test ! -f sub/added && + test_cmp empty result +' + +test_expect_success 'read-tree --reset removes outside worktree' ' + >empty && + echo init.t >.git/info/sparse-checkout && + git checkout -f top && + git reset --hard removed && + git ls-files sub/added >result && + test_cmp empty result +' + test_done diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh index 64f05080b6..074f2f2e3e 100755 --- a/t/t1300-repo-config.sh +++ b/t/t1300-repo-config.sh @@ -707,19 +707,41 @@ test_expect_success 'set --path' ' git config --path path.trailingtilde "foo~" && test_cmp expect .git/config' +if test "${HOME+set}" +then + test_set_prereq HOMEVAR +fi + cat >expect <<EOF $HOME/ /dev/null foo~ EOF -test_expect_success 'get --path' ' +test_expect_success HOMEVAR 'get --path' ' git config --get --path path.home > result && git config --get --path path.normal >> result && git config --get --path path.trailingtilde >> result && test_cmp expect result ' +cat >expect <<\EOF +/dev/null +foo~ +EOF + +test_expect_success 'get --path copes with unset $HOME' ' + ( + unset HOME; + test_must_fail git config --get --path path.home \ + >result 2>msg && + git config --get --path path.normal >>result && + git config --get --path path.trailingtilde >>result + ) && + grep "[Ff]ailed to expand.*~/" msg && + test_cmp expect result +' + rm .git/config git config quote.leading " test" diff --git a/t/t1402-check-ref-format.sh b/t/t1402-check-ref-format.sh index eb45afb018..782e75d000 100755 --- a/t/t1402-check-ref-format.sh +++ b/t/t1402-check-ref-format.sh @@ -41,6 +41,23 @@ test_expect_success "check-ref-format --branch @{-1}" ' refname2=$(git check-ref-format --branch @{-2}) && test "$refname2" = master' +test_expect_success 'check-ref-format --branch from subdir' ' + mkdir subdir && + + T=$(git write-tree) && + sha1=$(echo A | git commit-tree $T) && + git update-ref refs/heads/master $sha1 && + git update-ref refs/remotes/origin/master $sha1 + git checkout master && + git checkout origin/master && + git checkout master && + refname=$( + cd subdir && + git check-ref-format --branch @{-1} + ) && + test "$refname" = "$sha1" +' + valid_ref_normalized() { test_expect_success "ref name '$1' simplifies to '$2'" " refname=\$(git check-ref-format --print '$1') && diff --git a/t/t1501-worktree.sh b/t/t1501-worktree.sh index bd8b60732b..2c8f01f668 100755 --- a/t/t1501-worktree.sh +++ b/t/t1501-worktree.sh @@ -3,183 +3,320 @@ test_description='test separate work tree' . ./test-lib.sh -test_rev_parse() { - name=$1 - shift - - test_expect_success "$name: is-bare-repository" \ - "test '$1' = \"\$(git rev-parse --is-bare-repository)\"" - shift - [ $# -eq 0 ] && return - - test_expect_success "$name: is-inside-git-dir" \ - "test '$1' = \"\$(git rev-parse --is-inside-git-dir)\"" - shift - [ $# -eq 0 ] && return - - test_expect_success "$name: is-inside-work-tree" \ - "test '$1' = \"\$(git rev-parse --is-inside-work-tree)\"" - shift - [ $# -eq 0 ] && return - - test_expect_success "$name: prefix" \ - "test '$1' = \"\$(git rev-parse --show-prefix)\"" - shift - [ $# -eq 0 ] && return -} - -EMPTY_TREE=$(git write-tree) -mkdir -p work/sub/dir || exit 1 -mkdir -p work2 || exit 1 -mv .git repo.git || exit 1 - -say "core.worktree = relative path" -GIT_DIR=repo.git -GIT_CONFIG="$(pwd)"/$GIT_DIR/config -export GIT_DIR GIT_CONFIG -unset GIT_WORK_TREE -git config core.worktree ../work -test_rev_parse 'outside' false false false -cd work || exit 1 -GIT_DIR=../repo.git -GIT_CONFIG="$(pwd)"/$GIT_DIR/config -test_rev_parse 'inside' false false true '' -cd sub/dir || exit 1 -GIT_DIR=../../../repo.git -GIT_CONFIG="$(pwd)"/$GIT_DIR/config -test_rev_parse 'subdirectory' false false true sub/dir/ -cd ../../.. || exit 1 - -say "core.worktree = absolute path" -GIT_DIR=$(pwd)/repo.git -GIT_CONFIG=$GIT_DIR/config -git config core.worktree "$(pwd)/work" -test_rev_parse 'outside' false false false -cd work2 -test_rev_parse 'outside2' false false false -cd ../work || exit 1 -test_rev_parse 'inside' false false true '' -cd sub/dir || exit 1 -test_rev_parse 'subdirectory' false false true sub/dir/ -cd ../../.. || exit 1 - -say "GIT_WORK_TREE=relative path (override core.worktree)" -GIT_DIR=$(pwd)/repo.git -GIT_CONFIG=$GIT_DIR/config -git config core.worktree non-existent -GIT_WORK_TREE=work -export GIT_WORK_TREE -test_rev_parse 'outside' false false false -cd work2 -test_rev_parse 'outside' false false false -cd ../work || exit 1 -GIT_WORK_TREE=. -test_rev_parse 'inside' false false true '' -cd sub/dir || exit 1 -GIT_WORK_TREE=../.. -test_rev_parse 'subdirectory' false false true sub/dir/ -cd ../../.. || exit 1 - -mv work repo.git/work -mv work2 repo.git/work2 - -say "GIT_WORK_TREE=absolute path, work tree below git dir" -GIT_DIR=$(pwd)/repo.git -GIT_CONFIG=$GIT_DIR/config -GIT_WORK_TREE=$(pwd)/repo.git/work -test_rev_parse 'outside' false false false -cd repo.git || exit 1 -test_rev_parse 'in repo.git' false true false -cd objects || exit 1 -test_rev_parse 'in repo.git/objects' false true false -cd ../work2 || exit 1 -test_rev_parse 'in repo.git/work2' false true false -cd ../work || exit 1 -test_rev_parse 'in repo.git/work' false true true '' -cd sub/dir || exit 1 -test_rev_parse 'in repo.git/sub/dir' false true true sub/dir/ -cd ../../../.. || exit 1 - -test_expect_success 'repo finds its work tree' ' - (cd repo.git && - : > work/sub/dir/untracked && - test sub/dir/untracked = "$(git ls-files --others)") -' - -test_expect_success 'repo finds its work tree from work tree, too' ' - (cd repo.git/work/sub/dir && - : > tracked && - git --git-dir=../../.. add tracked && - cd ../../.. && - test sub/dir/tracked = "$(git ls-files)") +test_expect_success 'setup' ' + EMPTY_TREE=$(git write-tree) && + EMPTY_BLOB=$(git hash-object -t blob --stdin </dev/null) && + CHANGED_BLOB=$(echo changed | git hash-object -t blob --stdin) && + ZEROES=0000000000000000000000000000000000000000 && + EMPTY_BLOB7=$(echo $EMPTY_BLOB | sed "s/\(.......\).*/\1/") && + CHANGED_BLOB7=$(echo $CHANGED_BLOB | sed "s/\(.......\).*/\1/") && + + mkdir -p work/sub/dir && + mkdir -p work2 && + mv .git repo.git +' + +test_expect_success 'setup: helper for testing rev-parse' ' + test_rev_parse() { + echo $1 >expected.bare && + echo $2 >expected.inside-git && + echo $3 >expected.inside-worktree && + if test $# -ge 4 + then + echo $4 >expected.prefix + fi && + + git rev-parse --is-bare-repository >actual.bare && + git rev-parse --is-inside-git-dir >actual.inside-git && + git rev-parse --is-inside-work-tree >actual.inside-worktree && + if test $# -ge 4 + then + git rev-parse --show-prefix >actual.prefix + fi && + + test_cmp expected.bare actual.bare && + test_cmp expected.inside-git actual.inside-git && + test_cmp expected.inside-worktree actual.inside-worktree && + if test $# -ge 4 + then + # rev-parse --show-prefix should output + # a single newline when at the top of the work tree, + # but we test for that separately. + test -z "$4" && ! test -s actual.prefix || + test_cmp expected.prefix actual.prefix + fi + } +' + +test_expect_success 'setup: core.worktree = relative path' ' + unset GIT_WORK_TREE; + GIT_DIR=repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + export GIT_DIR GIT_CONFIG && + git config core.worktree ../work +' + +test_expect_success 'outside' ' + test_rev_parse false false false +' + +test_expect_success 'inside work tree' ' + ( + cd work && + GIT_DIR=../repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + test_rev_parse false false true "" + ) +' + +test_expect_failure 'empty prefix is actually written out' ' + echo >expected && + ( + cd work && + GIT_DIR=../repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + git rev-parse --show-prefix >../actual + ) && + test_cmp expected actual +' + +test_expect_success 'subdir of work tree' ' + ( + cd work/sub/dir && + GIT_DIR=../../../repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + test_rev_parse false false true sub/dir/ + ) +' + +test_expect_success 'setup: core.worktree = absolute path' ' + unset GIT_WORK_TREE; + GIT_DIR=$(pwd)/repo.git && + GIT_CONFIG=$GIT_DIR/config && + export GIT_DIR GIT_CONFIG && + git config core.worktree "$(pwd)/work" +' + +test_expect_success 'outside' ' + test_rev_parse false false false && + ( + cd work2 && + test_rev_parse false false false + ) +' + +test_expect_success 'inside work tree' ' + ( + cd work && + test_rev_parse false false true "" + ) +' + +test_expect_success 'subdir of work tree' ' + ( + cd work/sub/dir && + test_rev_parse false false true sub/dir/ + ) +' + +test_expect_success 'setup: GIT_WORK_TREE=relative (override core.worktree)' ' + GIT_DIR=$(pwd)/repo.git && + GIT_CONFIG=$GIT_DIR/config && + git config core.worktree non-existent && + GIT_WORK_TREE=work && + export GIT_DIR GIT_CONFIG GIT_WORK_TREE +' + +test_expect_success 'outside' ' + test_rev_parse false false false && + ( + cd work2 && + test_rev_parse false false false + ) +' + +test_expect_success 'inside work tree' ' + ( + cd work && + GIT_WORK_TREE=. && + test_rev_parse false false true "" + ) +' + +test_expect_success 'subdir of work tree' ' + ( + cd work/sub/dir && + GIT_WORK_TREE=../.. && + test_rev_parse false false true sub/dir/ + ) +' + +test_expect_success 'setup: GIT_WORK_TREE=absolute, below git dir' ' + mv work repo.git/work && + mv work2 repo.git/work2 && + GIT_DIR=$(pwd)/repo.git && + GIT_CONFIG=$GIT_DIR/config && + GIT_WORK_TREE=$(pwd)/repo.git/work && + export GIT_DIR GIT_CONFIG GIT_WORK_TREE +' + +test_expect_success 'outside' ' + echo outside && + test_rev_parse false false false +' + +test_expect_success 'in repo.git' ' + ( + cd repo.git && + test_rev_parse false true false + ) && + ( + cd repo.git/objects && + test_rev_parse false true false + ) && + ( + cd repo.git/work2 && + test_rev_parse false true false + ) +' + +test_expect_success 'inside work tree' ' + ( + cd repo.git/work && + test_rev_parse false true true "" + ) +' + +test_expect_success 'subdir of work tree' ' + ( + cd repo.git/work/sub/dir && + test_rev_parse false true true sub/dir/ + ) +' + +test_expect_success 'find work tree from repo' ' + echo sub/dir/untracked >expected && + cat <<-\EOF >repo.git/work/.gitignore && + expected.* + actual.* + .gitignore + EOF + >repo.git/work/sub/dir/untracked && + ( + cd repo.git && + git ls-files --others --exclude-standard >../actual + ) && + test_cmp expected actual +' + +test_expect_success 'find work tree from work tree' ' + echo sub/dir/tracked >expected && + >repo.git/work/sub/dir/tracked && + ( + cd repo.git/work/sub/dir && + git --git-dir=../../.. add tracked + ) && + ( + cd repo.git && + git ls-files >../actual + ) && + test_cmp expected actual ' test_expect_success '_gently() groks relative GIT_DIR & GIT_WORK_TREE' ' - (cd repo.git/work/sub/dir && - GIT_DIR=../../.. GIT_WORK_TREE=../.. GIT_PAGER= \ + ( + cd repo.git/work/sub/dir && + GIT_DIR=../../.. && + GIT_WORK_TREE=../.. && + GIT_PAGER= && + export GIT_DIR GIT_WORK_TREE GIT_PAGER && + git diff --exit-code tracked && - echo changed > tracked && - ! GIT_DIR=../../.. GIT_WORK_TREE=../.. GIT_PAGER= \ - git diff --exit-code tracked) -' -cat > diff-index-cached.expected <<\EOF -:000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A sub/dir/tracked -EOF -cat > diff-index.expected <<\EOF -:000000 100644 0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 A sub/dir/tracked -EOF - - -test_expect_success 'git diff-index' ' - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff-index $EMPTY_TREE > result && - test_cmp diff-index.expected result && - GIT_DIR=repo.git git diff-index --cached $EMPTY_TREE > result && - test_cmp diff-index-cached.expected result -' -cat >diff-files.expected <<\EOF -:100644 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0000000000000000000000000000000000000000 M sub/dir/tracked -EOF - -test_expect_success 'git diff-files' ' - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff-files > result && - test_cmp diff-files.expected result -' - -cat >diff-TREE.expected <<\EOF -diff --git a/sub/dir/tracked b/sub/dir/tracked -new file mode 100644 -index 0000000..5ea2ed4 ---- /dev/null -+++ b/sub/dir/tracked -@@ -0,0 +1 @@ -+changed -EOF -cat >diff-TREE-cached.expected <<\EOF -diff --git a/sub/dir/tracked b/sub/dir/tracked -new file mode 100644 -index 0000000..e69de29 -EOF -cat >diff-FILES.expected <<\EOF -diff --git a/sub/dir/tracked b/sub/dir/tracked -index e69de29..5ea2ed4 100644 ---- a/sub/dir/tracked -+++ b/sub/dir/tracked -@@ -0,0 +1 @@ -+changed -EOF - -test_expect_success 'git diff' ' - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff $EMPTY_TREE > result && - test_cmp diff-TREE.expected result && - GIT_DIR=repo.git git diff --cached $EMPTY_TREE > result && - test_cmp diff-TREE-cached.expected result && - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff > result && - test_cmp diff-FILES.expected result + echo changed >tracked && + test_must_fail git diff --exit-code tracked + ) +' + +test_expect_success 'diff-index respects work tree under .git dir' ' + cat >diff-index-cached.expected <<-EOF && + :000000 100644 $ZEROES $EMPTY_BLOB A sub/dir/tracked + EOF + cat >diff-index.expected <<-EOF && + :000000 100644 $ZEROES $ZEROES A sub/dir/tracked + EOF + + ( + GIT_DIR=repo.git && + GIT_WORK_TREE=repo.git/work && + export GIT_DIR GIT_WORK_TREE && + git diff-index $EMPTY_TREE >diff-index.actual && + git diff-index --cached $EMPTY_TREE >diff-index-cached.actual + ) && + test_cmp diff-index.expected diff-index.actual && + test_cmp diff-index-cached.expected diff-index-cached.actual +' + +test_expect_success 'diff-files respects work tree under .git dir' ' + cat >diff-files.expected <<-EOF && + :100644 100644 $EMPTY_BLOB $ZEROES M sub/dir/tracked + EOF + + ( + GIT_DIR=repo.git && + GIT_WORK_TREE=repo.git/work && + export GIT_DIR GIT_WORK_TREE && + git diff-files >diff-files.actual + ) && + test_cmp diff-files.expected diff-files.actual +' + +test_expect_success 'git diff respects work tree under .git dir' ' + cat >diff-TREE.expected <<-EOF && + diff --git a/sub/dir/tracked b/sub/dir/tracked + new file mode 100644 + index 0000000..$CHANGED_BLOB7 + --- /dev/null + +++ b/sub/dir/tracked + @@ -0,0 +1 @@ + +changed + EOF + cat >diff-TREE-cached.expected <<-EOF && + diff --git a/sub/dir/tracked b/sub/dir/tracked + new file mode 100644 + index 0000000..$EMPTY_BLOB7 + EOF + cat >diff-FILES.expected <<-EOF && + diff --git a/sub/dir/tracked b/sub/dir/tracked + index $EMPTY_BLOB7..$CHANGED_BLOB7 100644 + --- a/sub/dir/tracked + +++ b/sub/dir/tracked + @@ -0,0 +1 @@ + +changed + EOF + + ( + GIT_DIR=repo.git && + GIT_WORK_TREE=repo.git/work && + export GIT_DIR GIT_WORK_TREE && + git diff $EMPTY_TREE >diff-TREE.actual && + git diff --cached $EMPTY_TREE >diff-TREE-cached.actual && + git diff >diff-FILES.actual + ) && + test_cmp diff-TREE.expected diff-TREE.actual && + test_cmp diff-TREE-cached.expected diff-TREE-cached.actual && + test_cmp diff-FILES.expected diff-FILES.actual ' test_expect_success 'git grep' ' - (cd repo.git/work/sub && - GIT_DIR=../.. GIT_WORK_TREE=.. git grep -l changed | grep dir/tracked) + echo dir/tracked >expected.grep && + ( + cd repo.git/work/sub && + GIT_DIR=../.. && + GIT_WORK_TREE=.. && + export GIT_DIR GIT_WORK_TREE && + git grep -l changed >../../../actual.grep + ) && + test_cmp expected.grep actual.grep ' test_expect_success 'git commit' ' @@ -191,14 +328,14 @@ test_expect_success 'git commit' ' test_expect_success 'absolute pathspec should fail gracefully' ' ( - cd repo.git || exit 1 - git config --unset core.worktree + cd repo.git && + test_might_fail git config --unset core.worktree && test_must_fail git log HEAD -- /home ) ' test_expect_success 'make_relative_path handles double slashes in GIT_DIR' ' - : > dummy_file + >dummy_file echo git --git-dir="$(pwd)//repo.git" --work-tree="$(pwd)" add dummy_file && git --git-dir="$(pwd)//repo.git" --work-tree="$(pwd)" add dummy_file ' diff --git a/t/t2018-checkout-branch.sh b/t/t2018-checkout-branch.sh new file mode 100755 index 0000000000..fa69016381 --- /dev/null +++ b/t/t2018-checkout-branch.sh @@ -0,0 +1,172 @@ +#!/bin/sh + +test_description='checkout ' + +. ./test-lib.sh + +# Arguments: <branch> <sha> [<checkout options>] +# +# Runs "git checkout" to switch to <branch>, testing that +# +# 1) we are on the specified branch, <branch>; +# 2) HEAD is <sha>; if <sha> is not specified, the old HEAD is used. +# +# If <checkout options> is not specified, "git checkout" is run with -b. +do_checkout() { + exp_branch=$1 && + exp_ref="refs/heads/$exp_branch" && + + # if <sha> is not specified, use HEAD. + exp_sha=${2:-$(git rev-parse --verify HEAD)} && + + # default options for git checkout: -b + if [ -z "$3" ]; then + opts="-b" + else + opts="$3" + fi + + git checkout $opts $exp_branch $exp_sha && + + test $exp_ref = $(git rev-parse --symbolic-full-name HEAD) && + test $exp_sha = $(git rev-parse --verify HEAD) +} + +test_dirty_unmergeable() { + ! git diff --exit-code >/dev/null +} + +setup_dirty_unmergeable() { + echo >>file1 change2 +} + +test_dirty_mergeable() { + ! git diff --cached --exit-code >/dev/null +} + +setup_dirty_mergeable() { + echo >file2 file2 && + git add file2 +} + +test_expect_success 'setup' ' + test_commit initial file1 && + HEAD1=$(git rev-parse --verify HEAD) && + + test_commit change1 file1 && + HEAD2=$(git rev-parse --verify HEAD) && + + git branch -m branch1 +' + +test_expect_success 'checkout -b to a new branch, set to HEAD' ' + do_checkout branch2 +' + +test_expect_success 'checkout -b to a new branch, set to an explicit ref' ' + git checkout branch1 && + git branch -D branch2 && + + do_checkout branch2 $HEAD1 +' + +test_expect_success 'checkout -b to a new branch with unmergeable changes fails' ' + git checkout branch1 && + + # clean up from previous test + git branch -D branch2 && + + setup_dirty_unmergeable && + test_must_fail do_checkout branch2 $HEAD1 && + test_dirty_unmergeable +' + +test_expect_success 'checkout -f -b to a new branch with unmergeable changes discards changes' ' + # still dirty and on branch1 + do_checkout branch2 $HEAD1 "-f -b" && + test_must_fail test_dirty_unmergeable +' + +test_expect_success 'checkout -b to a new branch preserves mergeable changes' ' + git checkout branch1 && + + # clean up from previous test + git branch -D branch2 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 && + test_dirty_mergeable +' + +test_expect_success 'checkout -f -b to a new branch with mergeable changes discards changes' ' + # clean up from previous test + git reset --hard && + + git checkout branch1 && + + # clean up from previous test + git branch -D branch2 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 "-f -b" && + test_must_fail test_dirty_mergeable +' + +test_expect_success 'checkout -b to an existing branch fails' ' + git reset --hard HEAD && + + test_must_fail do_checkout branch2 $HEAD2 +' + +test_expect_success 'checkout -B to an existing branch resets branch to HEAD' ' + git checkout branch1 && + + do_checkout branch2 "" -B +' + +test_expect_success 'checkout -B to an existing branch from detached HEAD resets branch to HEAD' ' + git checkout $(git rev-parse --verify HEAD) && + + do_checkout branch2 "" -B +' + +test_expect_success 'checkout -B to an existing branch with an explicit ref resets branch to that ref' ' + git checkout branch1 && + + do_checkout branch2 $HEAD1 -B +' + +test_expect_success 'checkout -B to an existing branch with unmergeable changes fails' ' + git checkout branch1 && + + setup_dirty_unmergeable && + test_must_fail do_checkout branch2 $HEAD1 -B && + test_dirty_unmergeable +' + +test_expect_success 'checkout -f -B to an existing branch with unmergeable changes discards changes' ' + # still dirty and on branch1 + do_checkout branch2 $HEAD1 "-f -B" && + test_must_fail test_dirty_unmergeable +' + +test_expect_success 'checkout -B to an existing branch preserves mergeable changes' ' + git checkout branch1 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 -B && + test_dirty_mergeable +' + +test_expect_success 'checkout -f -B to an existing branch with mergeable changes discards changes' ' + # clean up from previous test + git reset --hard && + + git checkout branch1 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 "-f -B" && + test_must_fail test_dirty_mergeable +' + +test_done diff --git a/t/t3030-merge-recursive.sh b/t/t3030-merge-recursive.sh index d541544537..efe2900a37 100755 --- a/t/t3030-merge-recursive.sh +++ b/t/t3030-merge-recursive.sh @@ -294,7 +294,7 @@ test_expect_success 'fail if the index has unresolved entries' ' grep "You have not concluded your merge" out && rm -f .git/MERGE_HEAD && test_must_fail git merge "$c5" 2> out && - grep "Your local changes to .* would be overwritten by merge." out + grep "Your local changes to the following files would be overwritten by merge:" out ' test_expect_success 'merge-recursive remove conflict' ' diff --git a/t/t3210-pack-refs.sh b/t/t3210-pack-refs.sh index 525174013c..cd04361df8 100755 --- a/t/t3210-pack-refs.sh +++ b/t/t3210-pack-refs.sh @@ -60,6 +60,12 @@ test_expect_success 'see if git pack-refs --prune remove ref files' ' ! test -f .git/refs/heads/f ' +test_expect_success 'see if git pack-refs --prune removes empty dirs' ' + git branch r/s/t && + git pack-refs --all --prune && + ! test -e .git/refs/heads/r +' + test_expect_success \ 'git branch g should work when git branch g/h has been deleted' \ 'git branch g/h && diff --git a/t/t3301-notes.sh b/t/t3301-notes.sh index 1d82f79ee0..96b75813d7 100755 --- a/t/t3301-notes.sh +++ b/t/t3301-notes.sh @@ -299,7 +299,7 @@ cat expect-F >> expect-rm-F test_expect_success 'verify note removal with -F /dev/null' ' git log -4 > output && test_cmp expect-rm-F output && - ! git notes show + test_must_fail git notes show ' test_expect_success 'do not create empty note with -m "" (setup)' ' @@ -309,7 +309,7 @@ test_expect_success 'do not create empty note with -m "" (setup)' ' test_expect_success 'verify non-creation of note with -m ""' ' git log -4 > output && test_cmp expect-rm-F output && - ! git notes show + test_must_fail git notes show ' cat > expect-combine_m_and_F << EOF @@ -357,7 +357,7 @@ cat expect-multiline >> expect-rm-remove test_expect_success 'verify note removal with "git notes remove"' ' git log -4 > output && test_cmp expect-rm-remove output && - ! git notes show HEAD^ + test_must_fail git notes show HEAD^ ' cat > expect << EOF diff --git a/t/t3302-notes-index-expensive.sh b/t/t3302-notes-index-expensive.sh index 361a10aeb1..8ab333dbd9 100755 --- a/t/t3302-notes-index-expensive.sh +++ b/t/t3302-notes-index-expensive.sh @@ -98,7 +98,7 @@ time_notes () { for mode in no-notes notes do echo $mode - /usr/bin/time sh ../time_notes $mode $1 + /usr/bin/time "$SHELL_PATH" ../time_notes $mode $1 done } diff --git a/t/t3306-notes-prune.sh b/t/t3306-notes-prune.sh index b4554041b4..c4282179b3 100755 --- a/t/t3306-notes-prune.sh +++ b/t/t3306-notes-prune.sh @@ -67,7 +67,7 @@ test_expect_success 'remove some commits' ' test_expect_success 'verify that commits are gone' ' - ! git cat-file -p 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && + test_must_fail git cat-file -p 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && git cat-file -p 08341ad9e94faa089d60fd3f523affb25c6da189 && git cat-file -p ab5f302035f2e7aaf04265f08b42034c23256e1f ' @@ -106,7 +106,7 @@ test_expect_success 'prune notes' ' test_expect_success 'verify that notes are gone' ' - ! git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && + test_must_fail git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && git notes show 08341ad9e94faa089d60fd3f523affb25c6da189 && git notes show ab5f302035f2e7aaf04265f08b42034c23256e1f ' @@ -130,8 +130,8 @@ test_expect_success 'prune -v notes' ' test_expect_success 'verify that notes are gone' ' - ! git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && - ! git notes show 08341ad9e94faa089d60fd3f523affb25c6da189 && + test_must_fail git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && + test_must_fail git notes show 08341ad9e94faa089d60fd3f523affb25c6da189 && git notes show ab5f302035f2e7aaf04265f08b42034c23256e1f ' diff --git a/t/t3400-rebase.sh b/t/t3400-rebase.sh index d98c7b5571..349eebd542 100755 --- a/t/t3400-rebase.sh +++ b/t/t3400-rebase.sh @@ -14,140 +14,165 @@ GIT_AUTHOR_NAME=author@name GIT_AUTHOR_EMAIL=bogus@email@address export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL -test_expect_success \ - 'prepare repository with topic branches' \ - 'git config core.logAllRefUpdates true && - echo First > A && - git update-index --add A && - git commit -m "Add A." && - git checkout -b my-topic-branch && - echo Second > B && - git update-index --add B && - git commit -m "Add B." && - git checkout -f master && - echo Third >> A && - git update-index A && - git commit -m "Modify A." && - git checkout -b side my-topic-branch && - echo Side >> C && - git add C && - git commit -m "Add C" && - git checkout -b nonlinear my-topic-branch && - echo Edit >> B && - git add B && - git commit -m "Modify B" && - git merge side && - git checkout -b upstream-merged-nonlinear && - git merge master && - git checkout -f my-topic-branch && - git tag topic +test_expect_success 'prepare repository with topic branches' ' + git config core.logAllRefUpdates true && + echo First >A && + git update-index --add A && + git commit -m "Add A." && + git checkout -b force-3way && + echo Dummy >Y && + git update-index --add Y && + git commit -m "Add Y." && + git checkout -b filemove && + git reset --soft master && + mkdir D && + git mv A D/A && + git commit -m "Move A." && + git checkout -b my-topic-branch master && + echo Second >B && + git update-index --add B && + git commit -m "Add B." && + git checkout -f master && + echo Third >>A && + git update-index A && + git commit -m "Modify A." && + git checkout -b side my-topic-branch && + echo Side >>C && + git add C && + git commit -m "Add C" && + git checkout -b nonlinear my-topic-branch && + echo Edit >>B && + git add B && + git commit -m "Modify B" && + git merge side && + git checkout -b upstream-merged-nonlinear && + git merge master && + git checkout -f my-topic-branch && + git tag topic ' test_expect_success 'rebase on dirty worktree' ' - echo dirty >> A && - test_must_fail git rebase master' + echo dirty >>A && + test_must_fail git rebase master +' test_expect_success 'rebase on dirty cache' ' - git add A && - test_must_fail git rebase master' + git add A && + test_must_fail git rebase master +' test_expect_success 'rebase against master' ' - git reset --hard HEAD && - git rebase master' + git reset --hard HEAD && + git rebase master +' test_expect_success 'rebase against master twice' ' - git rebase master >out && - grep "Current branch my-topic-branch is up to date" out + git rebase master >out && + grep "Current branch my-topic-branch is up to date" out ' test_expect_success 'rebase against master twice with --force' ' - git rebase --force-rebase master >out && - grep "Current branch my-topic-branch is up to date, rebase forced" out + git rebase --force-rebase master >out && + grep "Current branch my-topic-branch is up to date, rebase forced" out ' test_expect_success 'rebase against master twice from another branch' ' - git checkout my-topic-branch^ && - git rebase master my-topic-branch >out && - grep "Current branch my-topic-branch is up to date" out + git checkout my-topic-branch^ && + git rebase master my-topic-branch >out && + grep "Current branch my-topic-branch is up to date" out ' test_expect_success 'rebase fast-forward to master' ' - git checkout my-topic-branch^ && - git rebase my-topic-branch >out && - grep "Fast-forwarded HEAD to my-topic-branch" out + git checkout my-topic-branch^ && + git rebase my-topic-branch >out && + grep "Fast-forwarded HEAD to my-topic-branch" out ' -test_expect_success \ - 'the rebase operation should not have destroyed author information' \ - '! (git log | grep "Author:" | grep "<>")' +test_expect_success 'the rebase operation should not have destroyed author information' ' + ! (git log | grep "Author:" | grep "<>") +' -test_expect_success \ - 'the rebase operation should not have destroyed author information (2)' \ - "git log -1 | grep 'Author: $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL>'" +test_expect_success 'the rebase operation should not have destroyed author information (2)' " + git log -1 | + grep 'Author: $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL>' +" test_expect_success 'HEAD was detached during rebase' ' - test $(git rev-parse HEAD@{1}) != $(git rev-parse my-topic-branch@{1}) + test $(git rev-parse HEAD@{1}) != $(git rev-parse my-topic-branch@{1}) ' test_expect_success 'rebase after merge master' ' - git reset --hard topic && - git merge master && - git rebase master && - ! (git show | grep "^Merge:") + git reset --hard topic && + git merge master && + git rebase master && + ! (git show | grep "^Merge:") ' test_expect_success 'rebase of history with merges is linearized' ' - git checkout nonlinear && - test 4 = $(git rev-list master.. | wc -l) && - git rebase master && - test 3 = $(git rev-list master.. | wc -l) + git checkout nonlinear && + test 4 = $(git rev-list master.. | wc -l) && + git rebase master && + test 3 = $(git rev-list master.. | wc -l) ' -test_expect_success \ - 'rebase of history with merges after upstream merge is linearized' ' - git checkout upstream-merged-nonlinear && - test 5 = $(git rev-list master.. | wc -l) && - git rebase master && - test 3 = $(git rev-list master.. | wc -l) +test_expect_success 'rebase of history with merges after upstream merge is linearized' ' + git checkout upstream-merged-nonlinear && + test 5 = $(git rev-list master.. | wc -l) && + git rebase master && + test 3 = $(git rev-list master.. | wc -l) ' test_expect_success 'rebase a single mode change' ' - git checkout master && - echo 1 > X && - git add X && - test_tick && - git commit -m prepare && - git checkout -b modechange HEAD^ && - echo 1 > X && - git add X && - test_chmod +x A && - test_tick && - git commit -m modechange && - GIT_TRACE=1 git rebase master + git checkout master && + echo 1 >X && + git add X && + test_tick && + git commit -m prepare && + git checkout -b modechange HEAD^ && + echo 1 >X && + git add X && + test_chmod +x A && + test_tick && + git commit -m modechange && + GIT_TRACE=1 git rebase master +' + +test_expect_success 'rebase is not broken by diff.renames' ' + git config diff.renames copies && + test_when_finished "git config --unset diff.renames" && + git checkout filemove && + GIT_TRACE=1 git rebase force-3way +' + +test_expect_success 'setup: recover' ' + test_might_fail git rebase --abort && + git reset --hard && + git checkout modechange ' test_expect_success 'Show verbose error when HEAD could not be detached' ' - : > B && - test_must_fail git rebase topic 2> output.err > output.out && - grep "Untracked working tree file .B. would be overwritten" output.err + >B && + test_must_fail git rebase topic 2>output.err >output.out && + grep "The following untracked working tree files would be overwritten by checkout:" output.err && + grep B output.err ' rm -f B test_expect_success 'dump usage when upstream arg is missing' ' - git checkout -b usage topic && - test_must_fail git rebase 2>error1 && - grep "[Uu]sage" error1 && - test_must_fail git rebase --abort 2>error2 && - grep "No rebase in progress" error2 && - test_must_fail git rebase --onto master 2>error3 && - grep "[Uu]sage" error3 && - ! grep "can.t shift" error3 + git checkout -b usage topic && + test_must_fail git rebase 2>error1 && + grep "[Uu]sage" error1 && + test_must_fail git rebase --abort 2>error2 && + grep "No rebase in progress" error2 && + test_must_fail git rebase --onto master 2>error3 && + grep "[Uu]sage" error3 && + ! grep "can.t shift" error3 ' test_expect_success 'rebase -q is quiet' ' - git checkout -b quiet topic && - git rebase -q master > output.out 2>&1 && - test ! -s output.out + git checkout -b quiet topic && + git rebase -q master >output.out 2>&1 && + test ! -s output.out ' test_expect_success 'Rebase a commit that sprinkles CRs in' ' diff --git a/t/t3402-rebase-merge.sh b/t/t3402-rebase-merge.sh index 7b7d07269a..2bea65634a 100755 --- a/t/t3402-rebase-merge.sh +++ b/t/t3402-rebase-merge.sh @@ -74,6 +74,15 @@ test_expect_success 'rebase the other way' ' git rebase --merge side ' +test_expect_success 'rebase -Xtheirs' ' + git checkout -b conflicting master~2 && + echo "AB $T" >> original && + git commit -mconflicting original && + git rebase -Xtheirs master && + grep AB original && + ! grep 11 original +' + test_expect_success 'merge and rebase should match' ' git diff-tree -r test-rebase test-merge >difference && if test -s difference diff --git a/t/t3404-rebase-interactive.sh b/t/t3404-rebase-interactive.sh index 47ca88fc52..af3b663aee 100755 --- a/t/t3404-rebase-interactive.sh +++ b/t/t3404-rebase-interactive.sh @@ -64,6 +64,67 @@ test_expect_success 'setup' ' done ' +# "exec" commands are ran with the user shell by default, but this may +# be non-POSIX. For example, if SHELL=zsh then ">file" doesn't work +# to create a file. Unseting SHELL avoids such non-portable behavior +# in tests. +SHELL= + +test_expect_success 'rebase -i with the exec command' ' + git checkout master && + ( + FAKE_LINES="1 exec_>touch-one + 2 exec_>touch-two exec_false exec_>touch-three + 3 4 exec_>\"touch-file__name_with_spaces\";_>touch-after-semicolon 5" && + export FAKE_LINES && + test_must_fail git rebase -i A + ) && + test_path_is_file touch-one && + test_path_is_file touch-two && + test_path_is_missing touch-three " (should have stopped before)" && + test $(git rev-parse C) = $(git rev-parse HEAD) || { + echo "Stopped at wrong revision:" + echo "($(git describe --tags HEAD) instead of C)" + false + } && + git rebase --continue && + test_path_is_file touch-three && + test_path_is_file "touch-file name with spaces" && + test_path_is_file touch-after-semicolon && + test $(git rev-parse master) = $(git rev-parse HEAD) || { + echo "Stopped at wrong revision:" + echo "($(git describe --tags HEAD) instead of master)" + false + } && + rm -f touch-* +' + +test_expect_success 'rebase -i with the exec command runs from tree root' ' + git checkout master && + mkdir subdir && cd subdir && + FAKE_LINES="1 exec_>touch-subdir" \ + git rebase -i HEAD^ && + cd .. && + test_path_is_file touch-subdir && + rm -fr subdir +' + +test_expect_success 'rebase -i with the exec command checks tree cleanness' ' + git checkout master && + ( + FAKE_LINES="exec_echo_foo_>file1 1" && + export FAKE_LINES && + test_must_fail git rebase -i HEAD^ + ) && + test $(git rev-parse master^) = $(git rev-parse HEAD) || { + echo "Stopped at wrong revision:" + echo "($(git describe --tags HEAD) instead of master^)" + false + } && + git reset --hard && + git rebase --continue +' + test_expect_success 'no changes are a nop' ' git checkout branch2 && git rebase -i F && @@ -143,16 +204,17 @@ test_expect_success 'abort' ' git rebase --abort && test $(git rev-parse new-branch1) = $(git rev-parse HEAD) && test "$(git symbolic-ref -q HEAD)" = "refs/heads/branch1" && - ! test -d .git/rebase-merge + test_path_is_missing .git/rebase-merge ' test_expect_success 'abort with error when new base cannot be checked out' ' git rm --cached file1 && git commit -m "remove file in base" && test_must_fail git rebase -i master > output 2>&1 && - grep "Untracked working tree file .file1. would be overwritten" \ + grep "The following untracked working tree files would be overwritten by checkout:" \ output && - ! test -d .git/rebase-merge && + grep "file1" output && + test_path_is_missing .git/rebase-merge && git reset --hard HEAD^ ' @@ -637,13 +699,19 @@ test_expect_success 'set up commits with funny messages' ' git commit -a -m "end with slash\\" && echo >>file1 && test_tick && + git commit -a -m "something (\000) that looks like octal" && + echo >>file1 && + test_tick && + git commit -a -m "something (\n) that looks like a newline" && + echo >>file1 && + test_tick && git commit -a -m "another commit" ' test_expect_success 'rebase-i history with funny messages' ' git rev-list A..funny >expect && test_tick && - FAKE_LINES="1 2" git rebase -i A && + FAKE_LINES="1 2 3 4" git rebase -i A && git rev-list A.. >actual && test_cmp expect actual ' diff --git a/t/t3407-rebase-abort.sh b/t/t3407-rebase-abort.sh index 2999e78937..fbb3f2e0df 100755 --- a/t/t3407-rebase-abort.sh +++ b/t/t3407-rebase-abort.sh @@ -38,7 +38,7 @@ testrebase() { # Clean up the state from the previous one git reset --hard pre-rebase && test_must_fail git rebase$type master && - test -d "$dotest" && + test_path_is_dir "$dotest" && git rebase --abort && test $(git rev-parse to-rebase) = $(git rev-parse pre-rebase) && test ! -d "$dotest" @@ -49,7 +49,7 @@ testrebase() { # Clean up the state from the previous one git reset --hard pre-rebase && test_must_fail git rebase$type master && - test -d "$dotest" && + test_path_is_dir "$dotest" && test_must_fail git rebase --skip && test $(git rev-parse HEAD) = $(git rev-parse master) && git rebase --abort && @@ -62,7 +62,7 @@ testrebase() { # Clean up the state from the previous one git reset --hard pre-rebase && test_must_fail git rebase$type master && - test -d "$dotest" && + test_path_is_dir "$dotest" && echo c > a && echo d >> a && git add a && diff --git a/t/t3410-rebase-preserve-dropped-merges.sh b/t/t3410-rebase-preserve-dropped-merges.sh index c49143a1a4..6f73b95558 100755 --- a/t/t3410-rebase-preserve-dropped-merges.sh +++ b/t/t3410-rebase-preserve-dropped-merges.sh @@ -43,11 +43,11 @@ test_expect_success 'setup' ' # G2 = same changes as G test_expect_success 'skip same-resolution merges with -p' ' git checkout H && - ! git merge E && + test_must_fail git merge E && test_commit L file1 23 && git checkout I && test_commit G2 file1 3 && - ! git merge E && + test_must_fail git merge E && test_commit J file1 23 && test_commit K file7 file7 && git rebase -i -p L && @@ -65,11 +65,11 @@ test_expect_success 'skip same-resolution merges with -p' ' # G2 = different changes as G test_expect_success 'keep different-resolution merges with -p' ' git checkout H && - ! git merge E && + test_must_fail git merge E && test_commit L2 file1 23 && git checkout I && test_commit G3 file1 4 && - ! git merge E && + test_must_fail git merge E && test_commit J2 file1 24 && test_commit K2 file7 file7 && test_must_fail git rebase -i -p L2 && diff --git a/t/t3415-rebase-autosquash.sh b/t/t3415-rebase-autosquash.sh index b63f4e2d67..37cb89ab53 100755 --- a/t/t3415-rebase-autosquash.sh +++ b/t/t3415-rebase-autosquash.sh @@ -21,38 +21,62 @@ test_expect_success setup ' git tag base ' -test_expect_success 'auto fixup' ' +test_auto_fixup() { git reset --hard base && echo 1 >file1 && git add -u && test_tick && git commit -m "fixup! first" - git tag final-fixup && + git tag $1 && test_tick && - git rebase --autosquash -i HEAD^^^ && + git rebase $2 -i HEAD^^^ && git log --oneline >actual && test 3 = $(wc -l <actual) && - git diff --exit-code final-fixup && + git diff --exit-code $1 && test 1 = "$(git cat-file blob HEAD^:file1)" && test 1 = $(git cat-file commit HEAD^ | grep first | wc -l) +} + +test_expect_success 'auto fixup (option)' ' + test_auto_fixup final-fixup-option --autosquash +' + +test_expect_success 'auto fixup (config)' ' + git config rebase.autosquash true && + test_auto_fixup final-fixup-config-true && + test_must_fail test_auto_fixup fixup-config-true-no --no-autosquash && + git config rebase.autosquash false && + test_must_fail test_auto_fixup final-fixup-config-false ' -test_expect_success 'auto squash' ' +test_auto_squash() { git reset --hard base && echo 1 >file1 && git add -u && test_tick && git commit -m "squash! first" - git tag final-squash && + git tag $1 && test_tick && - git rebase --autosquash -i HEAD^^^ && + git rebase $2 -i HEAD^^^ && git log --oneline >actual && test 3 = $(wc -l <actual) && - git diff --exit-code final-squash && + git diff --exit-code $1 && test 1 = "$(git cat-file blob HEAD^:file1)" && test 2 = $(git cat-file commit HEAD^ | grep first | wc -l) +} + +test_expect_success 'auto squash (option)' ' + test_auto_squash final-squash --autosquash +' + +test_expect_success 'auto squash (config)' ' + git config rebase.autosquash true && + test_auto_squash final-squash-config-true && + test_must_fail test_auto_squash squash-config-true-no --no-autosquash && + git config rebase.autosquash false && + test_must_fail test_auto_squash final-squash-config-false ' test_expect_success 'misspelled auto squash' ' diff --git a/t/t3418-rebase-continue.sh b/t/t3418-rebase-continue.sh new file mode 100755 index 0000000000..3b0d27350e --- /dev/null +++ b/t/t3418-rebase-continue.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +test_description='git rebase --continue tests' + +. ./test-lib.sh + +. "$TEST_DIRECTORY"/lib-rebase.sh + +set_fake_editor + +test_expect_success 'setup' ' + test_commit "commit-new-file-F1" F1 1 && + test_commit "commit-new-file-F2" F2 2 && + + git checkout -b topic HEAD^ && + test_commit "commit-new-file-F2-on-topic-branch" F2 22 && + + git checkout master +' + +test_expect_success 'interactive rebase --continue works with touched file' ' + rm -fr .git/rebase-* && + git reset --hard && + git checkout master && + + FAKE_LINES="edit 1" git rebase -i HEAD^ && + test-chmtime =-60 F1 && + git rebase --continue +' + +test_expect_success 'non-interactive rebase --continue works with touched file' ' + rm -fr .git/rebase-* && + git reset --hard && + git checkout master && + + test_must_fail git rebase --onto master master topic && + echo "Resolved" >F2 && + git add F2 && + test-chmtime =-60 F1 && + git rebase --continue +' + +test_done diff --git a/t/t3505-cherry-pick-empty.sh b/t/t3505-cherry-pick-empty.sh index e51e505a9f..c10b28cf57 100755 --- a/t/t3505-cherry-pick-empty.sh +++ b/t/t3505-cherry-pick-empty.sh @@ -13,12 +13,30 @@ test_expect_success setup ' git checkout -b empty-branch && test_tick && - git commit --allow-empty -m "empty" + git commit --allow-empty -m "empty" && + + echo third >> file1 && + git add file1 && + test_tick && + git commit --allow-empty-message -m "" ' test_expect_success 'cherry-pick an empty commit' ' git checkout master && { + git cherry-pick empty-branch^ + test "$?" = 1 + } +' + +test_expect_success 'index lockfile was removed' ' + + test ! -f .git/index.lock + +' + +test_expect_success 'cherry-pick a commit with an empty message' ' + git checkout master && { git cherry-pick empty-branch test "$?" = 1 } diff --git a/t/t3507-cherry-pick-conflict.sh b/t/t3507-cherry-pick-conflict.sh index e25cf8039a..607bf25d8f 100755 --- a/t/t3507-cherry-pick-conflict.sh +++ b/t/t3507-cherry-pick-conflict.sh @@ -38,6 +38,26 @@ test_expect_success 'failed cherry-pick does not advance HEAD' ' test "$head" = "$newhead" ' +test_expect_success 'advice from failed cherry-pick' " + git checkout -f initial^0 && + git read-tree -u --reset HEAD && + git clean -d -f -f -q -x && + + git update-index --refresh && + git diff-index --exit-code HEAD && + + picked=\$(git rev-parse --short picked) && + cat <<-EOF >expected && + error: could not apply \$picked... picked + hint: after resolving the conflicts, mark the corrected paths + hint: with 'git add <paths>' or 'git rm <paths>' + hint: and commit the result with 'git commit -c \$picked' + EOF + test_must_fail git cherry-pick picked 2>actual && + + test_cmp expected actual +" + test_expect_success 'failed cherry-pick produces dirty index' ' git checkout -f initial^0 && diff --git a/t/t3508-cherry-pick-many-commits.sh b/t/t3508-cherry-pick-many-commits.sh index f90ed3da3e..8e09fd0319 100755 --- a/t/t3508-cherry-pick-many-commits.sh +++ b/t/t3508-cherry-pick-many-commits.sh @@ -4,6 +4,18 @@ test_description='test cherry-picking many commits' . ./test-lib.sh +check_head_differs_from() { + head=$(git rev-parse --verify HEAD) && + arg=$(git rev-parse --verify "$1") && + test "$head" != "$arg" +} + +check_head_equals() { + head=$(git rev-parse --verify HEAD) && + arg=$(git rev-parse --verify "$1") && + test "$head" = "$arg" +} + test_expect_success setup ' echo first > file1 && git add file1 && @@ -23,13 +35,55 @@ test_expect_success setup ' ' test_expect_success 'cherry-pick first..fourth works' ' + cat <<-\EOF >expected && + [master OBJID] second + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + [master OBJID] third + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + [master OBJID] fourth + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + EOF + + git checkout -f master && + git reset --hard first && + test_tick && + git cherry-pick first..fourth >actual && + git diff --quiet other && + git diff --quiet HEAD other && + + sed -e "s/$_x05[0-9a-f][0-9a-f]/OBJID/" <actual >actual.fuzzy && + test_cmp expected actual.fuzzy && + check_head_differs_from fourth +' + +test_expect_success 'cherry-pick --strategy resolve first..fourth works' ' + cat <<-\EOF >expected && + Trying simple merge. + [master OBJID] second + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + Trying simple merge. + [master OBJID] third + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + Trying simple merge. + [master OBJID] fourth + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + EOF + git checkout -f master && git reset --hard first && test_tick && - git cherry-pick first..fourth && + git cherry-pick --strategy resolve first..fourth >actual && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" != "$(git rev-parse --verify fourth)" + sed -e "s/$_x05[0-9a-f][0-9a-f]/OBJID/" <actual >actual.fuzzy && + test_cmp expected actual.fuzzy && + check_head_differs_from fourth ' test_expect_success 'cherry-pick --ff first..fourth works' ' @@ -39,7 +93,7 @@ test_expect_success 'cherry-pick --ff first..fourth works' ' git cherry-pick --ff first..fourth && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" = "$(git rev-parse --verify fourth)" + check_head_equals fourth ' test_expect_success 'cherry-pick -n first..fourth works' ' @@ -89,7 +143,7 @@ test_expect_success 'cherry-pick -3 fourth works' ' git cherry-pick -3 fourth && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" != "$(git rev-parse --verify fourth)" + check_head_differs_from fourth ' test_expect_success 'cherry-pick --stdin works' ' @@ -99,7 +153,7 @@ test_expect_success 'cherry-pick --stdin works' ' git rev-list --reverse first..fourth | git cherry-pick --stdin && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" != "$(git rev-parse --verify fourth)" + check_head_differs_from fourth ' test_done diff --git a/t/t3509-cherry-pick-merge-df.sh b/t/t3509-cherry-pick-merge-df.sh new file mode 100755 index 0000000000..a5ccdbf8fc --- /dev/null +++ b/t/t3509-cherry-pick-merge-df.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +test_description='Test cherry-pick with directory/file conflicts' +. ./test-lib.sh + +test_expect_success SYMLINKS 'Setup rename across paths each below D/F conflicts' ' + mkdir a && + >a/f && + git add a && + git commit -m a && + + mkdir b && + ln -s ../a b/a && + git add b && + git commit -m b && + + git checkout -b branch && + rm b/a && + mv a b/a && + ln -s b/a a && + git add . && + git commit -m swap && + + >f1 && + git add f1 && + git commit -m f1 +' + +test_expect_success SYMLINKS 'Cherry-pick succeeds with rename across D/F conflicts' ' + git reset --hard && + git checkout master^0 && + git cherry-pick branch +' + +test_done diff --git a/t/t3700-add.sh b/t/t3700-add.sh index d03495dc7a..7d7140db38 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -272,17 +272,20 @@ test_expect_success 'git add --dry-run of non-existing file' " echo \"fatal: pathspec 'ignored-file' did not match any files\" | test_cmp - actual " -cat >expect <<EOF +cat >expect.err <<\EOF The following paths are ignored by one of your .gitignore files: ignored-file Use -f if you really want to add them. fatal: no files added +EOF +cat >expect.out <<\EOF add 'track-this' EOF test_expect_success 'git add --dry-run --ignore-missing of non-existing file' ' - test_must_fail git add --dry-run --ignore-missing track-this ignored-file >actual 2>&1 && - test_cmp expect actual + test_must_fail git add --dry-run --ignore-missing track-this ignored-file >actual.out 2>actual.err && + test_cmp expect.out actual.out && + test_cmp expect.err actual.err ' test_done diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh index dae6358516..19857f4326 100755 --- a/t/t4013-diff-various.sh +++ b/t/t4013-diff-various.sh @@ -208,6 +208,7 @@ log -p --first-parent master log -m -p --first-parent master log -m -p master log -SF master +log -S F master log -SF -p master log --decorate --all log --decorate=full --all @@ -282,4 +283,8 @@ diff master master^ side diff --dirstat master~1 master~2 EOF +test_expect_success 'log -S requires an argument' ' + test_must_fail git log -S +' + test_done diff --git a/t/t4013/diff.log_-S_F_master b/t/t4013/diff.log_-S_F_master new file mode 100644 index 0000000000..978d2b4118 --- /dev/null +++ b/t/t4013/diff.log_-S_F_master @@ -0,0 +1,7 @@ +$ git log -S F master +commit 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0 +Author: A U Thor <author@example.com> +Date: Mon Jun 26 00:02:00 2006 +0000 + + Third +$ diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 5b10e976a3..61de8a2718 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -32,7 +32,7 @@ EOF sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java -builtin_patterns="bibtex cpp html java objc pascal php python ruby tex" +builtin_patterns="bibtex cpp csharp html java objc pascal php python ruby tex" for p in $builtin_patterns do test_expect_success "builtin $p pattern compiles" ' diff --git a/t/t4027-diff-submodule.sh b/t/t4027-diff-submodule.sh index 1bd8e5ee3a..d99814ac64 100755 --- a/t/t4027-diff-submodule.sh +++ b/t/t4027-diff-submodule.sh @@ -114,6 +114,69 @@ test_expect_success 'git diff HEAD with dirty submodule (work tree, refs match)' ! test -s actual4 ' +test_expect_success 'git diff HEAD with dirty submodule (work tree, refs match) [.git/config]' ' + git config diff.ignoreSubmodules all && + git diff HEAD >actual && + ! test -s actual && + git config submodule.subname.ignore none && + git config submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config submodule.subname.ignore all && + git diff HEAD >actual2 && + ! test -s actual2 && + git config submodule.subname.ignore untracked && + git diff HEAD >actual3 && + sed -e "1,/^@@/d" actual3 >actual3.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual3.body && + git config submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git diff HEAD --ignore-submodules=none >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname && + git config --unset diff.ignoreSubmodules +' + +test_expect_success 'git diff HEAD with dirty submodule (work tree, refs match) [.gitmodules]' ' + git config diff.ignoreSubmodules dirty && + git diff HEAD >actual && + ! test -s actual && + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config -f .gitmodules submodule.subname.ignore all && + git config -f .gitmodules submodule.subname.path sub && + git diff HEAD >actual2 && + ! test -s actual2 && + git config -f .gitmodules submodule.subname.ignore untracked && + git diff HEAD >actual3 && + sed -e "1,/^@@/d" actual3 >actual3.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual3.body && + git config -f .gitmodules submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git config submodule.subname.ignore none && + git config submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname && + git config --unset diff.ignoreSubmodules && + rm .gitmodules +' + test_expect_success 'git diff HEAD with dirty submodule (index, refs match)' ' ( cd sub && @@ -146,6 +209,103 @@ test_expect_success 'git diff HEAD with dirty submodule (untracked, refs match)' ! test -s actual4 ' +test_expect_success 'git diff HEAD with dirty submodule (untracked, refs match) [.git/config]' ' + git config submodule.subname.ignore all && + git config submodule.subname.path sub && + git diff HEAD >actual2 && + ! test -s actual2 && + git config submodule.subname.ignore untracked && + git diff HEAD >actual3 && + ! test -s actual3 && + git config submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git diff --ignore-submodules=none HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname +' + +test_expect_success 'git diff HEAD with dirty submodule (untracked, refs match) [.gitmodules]' ' + git config --add -f .gitmodules submodule.subname.ignore all && + git config --add -f .gitmodules submodule.subname.path sub && + git diff HEAD >actual2 && + ! test -s actual2 && + git config -f .gitmodules submodule.subname.ignore untracked && + git diff HEAD >actual3 && + ! test -s actual3 && + git config -f .gitmodules submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git config submodule.subname.ignore none && + git config submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname && + rm .gitmodules +' + +test_expect_success 'git diff between submodule commits' ' + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git diff --ignore-submodules=dirty HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git diff --ignore-submodules HEAD^..HEAD >actual && + ! test -s actual +' + +test_expect_success 'git diff between submodule commits [.git/config]' ' + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config submodule.subname.ignore dirty && + git config submodule.subname.path sub && + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config submodule.subname.ignore all && + git diff HEAD^..HEAD >actual && + ! test -s actual && + git diff --ignore-submodules=dirty HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + git config --remove-section submodule.subname +' + +test_expect_success 'git diff between submodule commits [.gitmodules]' ' + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sub && + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config -f .gitmodules submodule.subname.ignore all && + git diff HEAD^..HEAD >actual && + ! test -s actual && + git config submodule.subname.ignore dirty && + git config submodule.subname.path sub && + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname && + rm .gitmodules +' + test_expect_success 'git diff (empty submodule dir)' ' : >empty && rm -rf sub/* sub/.git && diff --git a/t/t4045-diff-relative.sh b/t/t4045-diff-relative.sh new file mode 100755 index 0000000000..8a3c63b9e2 --- /dev/null +++ b/t/t4045-diff-relative.sh @@ -0,0 +1,61 @@ +#!/bin/sh + +test_description='diff --relative tests' +. ./test-lib.sh + +test_expect_success 'setup' ' + git commit --allow-empty -m empty && + echo content >file1 && + mkdir subdir && + echo other content >subdir/file2 && + git add . && + git commit -m one +' + +check_diff() { +expect=$1; shift +cat >expected <<EOF +diff --git a/$expect b/$expect +new file mode 100644 +index 0000000..25c05ef +--- /dev/null ++++ b/$expect +@@ -0,0 +1 @@ ++other content +EOF +test_expect_success "-p $*" " + git diff -p $* HEAD^ >actual && + test_cmp expected actual +" +} + +check_stat() { +expect=$1; shift +cat >expected <<EOF + $expect | 1 + + 1 files changed, 1 insertions(+), 0 deletions(-) +EOF +test_expect_success "--stat $*" " + git diff --stat $* HEAD^ >actual && + test_cmp expected actual +" +} + +check_raw() { +expect=$1; shift +cat >expected <<EOF +:000000 100644 0000000000000000000000000000000000000000 25c05ef3639d2d270e7fe765a67668f098092bc5 A $expect +EOF +test_expect_success "--raw $*" " + git diff --no-abbrev --raw $* HEAD^ >actual && + test_cmp expected actual +" +} + +for type in diff stat raw; do + check_$type file2 --relative=subdir/ + check_$type file2 --relative=subdir + check_$type dir/file2 --relative=sub +done + +test_done diff --git a/t/t4111-apply-subdir.sh b/t/t4111-apply-subdir.sh new file mode 100755 index 0000000000..a52d94ae21 --- /dev/null +++ b/t/t4111-apply-subdir.sh @@ -0,0 +1,142 @@ +#!/bin/sh + +test_description='patching from inconvenient places' + +. ./test-lib.sh + +test_expect_success 'setup' ' + cat >patch <<-\EOF && + diff file.orig file + --- a/file.orig + +++ b/file + @@ -1 +1,2 @@ + 1 + +2 + EOF + patch="$(pwd)/patch" && + + echo 1 >preimage && + printf "%s\n" 1 2 >postimage && + echo 3 >other && + + test_tick && + git commit --allow-empty -m basis +' + +test_expect_success 'setup: subdir' ' + reset_subdir() { + git reset && + mkdir -p sub/dir/b && + mkdir -p objects && + cp "$1" file && + cp "$1" objects/file && + cp "$1" sub/dir/file && + cp "$1" sub/dir/b/file && + git add file sub/dir/file sub/dir/b/file objects/file && + cp "$2" file && + cp "$2" sub/dir/file && + cp "$2" sub/dir/b/file && + cp "$2" objects/file && + test_might_fail git update-index --refresh -q + } +' + +test_expect_success 'apply from subdir of toplevel' ' + cp postimage expected && + reset_subdir other preimage && + ( + cd sub/dir && + git apply "$patch" + ) && + test_cmp expected sub/dir/file +' + +test_expect_success 'apply --cached from subdir of toplevel' ' + cp postimage expected && + cp other expected.working && + reset_subdir preimage other && + ( + cd sub/dir && + git apply --cached "$patch" + ) && + git show :sub/dir/file >actual && + test_cmp expected actual && + test_cmp expected.working sub/dir/file +' + +test_expect_success 'apply --index from subdir of toplevel' ' + cp postimage expected && + reset_subdir preimage other && + ( + cd sub/dir && + test_must_fail git apply --index "$patch" + ) && + reset_subdir other preimage && + ( + cd sub/dir && + test_must_fail git apply --index "$patch" + ) && + reset_subdir preimage preimage && + ( + cd sub/dir && + git apply --index "$patch" + ) && + git show :sub/dir/file >actual && + test_cmp expected actual && + test_cmp expected sub/dir/file +' + +test_expect_success 'apply from .git dir' ' + cp postimage expected && + cp preimage .git/file && + cp preimage .git/objects/file + ( + cd .git && + git apply "$patch" + ) && + test_cmp expected .git/file +' + +test_expect_success 'apply from subdir of .git dir' ' + cp postimage expected && + cp preimage .git/file && + cp preimage .git/objects/file + ( + cd .git/objects && + git apply "$patch" + ) && + test_cmp expected .git/objects/file +' + +test_expect_success 'apply --cached from .git dir' ' + cp postimage expected && + cp other expected.working && + cp other .git/file && + reset_subdir preimage other && + ( + cd .git && + git apply --cached "$patch" + ) && + git show :file >actual && + test_cmp expected actual && + test_cmp expected.working file && + test_cmp expected.working .git/file +' + +test_expect_success 'apply --cached from subdir of .git dir' ' + cp postimage expected && + cp preimage expected.subdir && + cp other .git/file && + cp other .git/objects/file && + reset_subdir preimage other && + ( + cd .git/objects && + git apply --cached "$patch" + ) && + git show :file >actual && + git show :objects/file >actual.subdir && + test_cmp expected actual && + test_cmp expected.subdir actual.subdir +' + +test_done diff --git a/t/t4120-apply-popt.sh b/t/t4120-apply-popt.sh index b463b4f05c..2b2d00b334 100755 --- a/t/t4120-apply-popt.sh +++ b/t/t4120-apply-popt.sh @@ -10,21 +10,50 @@ test_description='git apply -p handling.' test_expect_success setup ' mkdir sub && echo A >sub/file1 && - cp sub/file1 file1 && + cp sub/file1 file1.saved && git add sub/file1 && echo B >sub/file1 && git diff >patch.file && - rm sub/file1 && - rmdir sub + git checkout -- sub/file1 && + git mv sub süb && + echo B >süb/file1 && + git diff >patch.escaped && + grep "[\]" patch.escaped && + rm süb/file1 && + rmdir süb ' test_expect_success 'apply git diff with -p2' ' + cp file1.saved file1 && git apply -p2 patch.file ' test_expect_success 'apply with too large -p' ' + cp file1.saved file1 && test_must_fail git apply --stat -p3 patch.file 2>err && grep "removing 3 leading" err ' +test_expect_success 'apply (-p2) traditional diff with funny filenames' ' + cat >patch.quotes <<-\EOF && + diff -u "a/"sub/file1 "b/"sub/file1 + --- "a/"sub/file1 + +++ "b/"sub/file1 + @@ -1 +1 @@ + -A + +B + EOF + echo B >expected && + + cp file1.saved file1 && + git apply -p2 patch.quotes && + test_cmp expected file1 +' + +test_expect_success 'apply with too large -p and fancy filename' ' + cp file1.saved file1 && + test_must_fail git apply --stat -p3 patch.escaped 2>err && + grep "removing 3 leading" err +' + test_done diff --git a/t/t4135-apply-weird-filenames.sh b/t/t4135-apply-weird-filenames.sh new file mode 100755 index 0000000000..1e5aad57ab --- /dev/null +++ b/t/t4135-apply-weird-filenames.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +test_description='git apply with weird postimage filenames' + +. ./test-lib.sh + +test_expect_success 'setup' ' + vector=$TEST_DIRECTORY/t4135 && + + test_tick && + git commit --allow-empty -m preimage && + git tag preimage && + + reset_preimage() { + git checkout -f preimage^0 && + git read-tree -u --reset HEAD && + git update-index --refresh + } && + + test_when_finished "rm -f \"tab embedded.txt\"" && + test_when_finished "rm -f '\''\"quoteembedded\".txt'\''" && + if touch -- "tab embedded.txt" '\''"quoteembedded".txt'\'' + then + test_set_prereq FUNNYNAMES + fi +' + +try_filename() { + desc=$1 + postimage=$2 + prereq=${3:-} + exp1=${4:-success} + exp2=${5:-success} + exp3=${6:-success} + + test_expect_$exp1 $prereq "$desc, git-style file creation patch" " + echo postimage >expected && + reset_preimage && + rm -f '$postimage' && + git apply -v \"\$vector\"/'git-$desc.diff' && + test_cmp expected '$postimage' + " + + test_expect_$exp2 $prereq "$desc, traditional patch" " + echo postimage >expected && + reset_preimage && + echo preimage >'$postimage' && + git apply -v \"\$vector\"/'diff-$desc.diff' && + test_cmp expected '$postimage' + " + + test_expect_$exp3 $prereq "$desc, traditional file creation patch" " + echo postimage >expected && + reset_preimage && + rm -f '$postimage' && + git apply -v \"\$vector\"/'add-$desc.diff' && + test_cmp expected '$postimage' + " +} + +try_filename 'plain' 'postimage.txt' +try_filename 'with spaces' 'post image.txt' +try_filename 'with tab' 'post image.txt' FUNNYNAMES +try_filename 'with backslash' 'post\image.txt' BSLASHPSPEC +try_filename 'with quote' '"postimage".txt' FUNNYNAMES success failure success + +test_expect_success 'whitespace-damaged traditional patch' ' + echo postimage >expected && + reset_preimage && + rm -f postimage.txt && + git apply -v "$vector/damaged.diff" && + test_cmp expected postimage.txt +' + +test_done diff --git a/t/t4135/.gitignore b/t/t4135/.gitignore new file mode 100644 index 0000000000..3e58e65f57 --- /dev/null +++ b/t/t4135/.gitignore @@ -0,0 +1,3 @@ +/file-creation/ +/trad-creation/ +/trad-modification/ diff --git a/t/t4135/add-plain.diff b/t/t4135/add-plain.diff new file mode 100644 index 0000000000..cf5970a089 --- /dev/null +++ b/t/t4135/add-plain.diff @@ -0,0 +1,5 @@ +diff -pruN a/postimage.txt b/postimage.txt +--- a/postimage.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/postimage.txt 2010-08-18 20:13:31.484002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with backslash.diff b/t/t4135/add-with backslash.diff new file mode 100644 index 0000000000..c6861e1966 --- /dev/null +++ b/t/t4135/add-with backslash.diff @@ -0,0 +1,5 @@ +diff -pruN a/post\image.txt b/post\image.txt +--- a/post\image.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/post\image.txt 2010-08-18 20:13:31.692002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with quote.diff b/t/t4135/add-with quote.diff new file mode 100644 index 0000000000..866de78ca1 --- /dev/null +++ b/t/t4135/add-with quote.diff @@ -0,0 +1,5 @@ +diff -pruN a/"postimage".txt b/"postimage".txt +--- a/"postimage".txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/"postimage".txt 2010-08-18 20:13:31.756002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with spaces.diff b/t/t4135/add-with spaces.diff new file mode 100644 index 0000000000..a9a1212a21 --- /dev/null +++ b/t/t4135/add-with spaces.diff @@ -0,0 +1,5 @@ +diff -pruN a/post image.txt b/post image.txt +--- a/post image.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/post image.txt 2010-08-18 20:13:31.556002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with tab.diff b/t/t4135/add-with tab.diff new file mode 100644 index 0000000000..bb67cb7930 --- /dev/null +++ b/t/t4135/add-with tab.diff @@ -0,0 +1,5 @@ +diff -pruN a/post image.txt b/post image.txt +--- a/post image.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/post image.txt 2010-08-18 20:13:31.628002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/damaged.diff b/t/t4135/damaged.diff new file mode 100644 index 0000000000..68f7ededf9 --- /dev/null +++ b/t/t4135/damaged.diff @@ -0,0 +1,5 @@ +diff -pruN a/postimage.txt b/postimage.txt +--- a/postimage.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/postimage.txt 2010-08-18 20:13:31.484002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/diff-plain.diff b/t/t4135/diff-plain.diff new file mode 100644 index 0000000000..acedcfa612 --- /dev/null +++ b/t/t4135/diff-plain.diff @@ -0,0 +1,5 @@ +--- postimage.txt.orig 2010-08-18 20:13:31.432002255 -0500 ++++ postimage.txt 2010-08-18 20:13:31.432002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with backslash.diff b/t/t4135/diff-with backslash.diff new file mode 100644 index 0000000000..9068a61bd9 --- /dev/null +++ b/t/t4135/diff-with backslash.diff @@ -0,0 +1,5 @@ +--- post\image.txt.orig 2010-08-18 20:13:31.680002255 -0500 ++++ post\image.txt 2010-08-18 20:13:31.680002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with quote.diff b/t/t4135/diff-with quote.diff new file mode 100644 index 0000000000..c8e8cc1a8d --- /dev/null +++ b/t/t4135/diff-with quote.diff @@ -0,0 +1,5 @@ +--- "postimage".txt.orig 2010-08-18 20:13:31.744002255 -0500 ++++ "postimage".txt 2010-08-18 20:13:31.744002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with spaces.diff b/t/t4135/diff-with spaces.diff new file mode 100644 index 0000000000..3512056f21 --- /dev/null +++ b/t/t4135/diff-with spaces.diff @@ -0,0 +1,5 @@ +--- post image.txt.orig 2010-08-18 20:13:31.544002255 -0500 ++++ post image.txt 2010-08-18 20:13:31.544002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with tab.diff b/t/t4135/diff-with tab.diff new file mode 100644 index 0000000000..4e6d9b2941 --- /dev/null +++ b/t/t4135/diff-with tab.diff @@ -0,0 +1,5 @@ +--- post image.txt.orig 2010-08-18 20:13:31.616002255 -0500 ++++ post image.txt 2010-08-18 20:13:31.616002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/git-plain.diff b/t/t4135/git-plain.diff new file mode 100644 index 0000000000..db47d1a693 --- /dev/null +++ b/t/t4135/git-plain.diff @@ -0,0 +1,7 @@ +diff --git a/postimage.txt b/postimage.txt +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ b/postimage.txt +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with backslash.diff b/t/t4135/git-with backslash.diff new file mode 100644 index 0000000000..0e84a10e93 --- /dev/null +++ b/t/t4135/git-with backslash.diff @@ -0,0 +1,7 @@ +diff --git "a/post\\image.txt" "b/post\\image.txt" +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ "b/post\\image.txt" +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with quote.diff b/t/t4135/git-with quote.diff new file mode 100644 index 0000000000..bdbea8af35 --- /dev/null +++ b/t/t4135/git-with quote.diff @@ -0,0 +1,7 @@ +diff --git "a/\"postimage\".txt" "b/\"postimage\".txt" +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ "b/\"postimage\".txt" +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with spaces.diff b/t/t4135/git-with spaces.diff new file mode 100644 index 0000000000..baaa810de0 --- /dev/null +++ b/t/t4135/git-with spaces.diff @@ -0,0 +1,7 @@ +diff --git a/post image.txt b/post image.txt +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ b/post image.txt +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with tab.diff b/t/t4135/git-with tab.diff new file mode 100644 index 0000000000..cca3c9287b --- /dev/null +++ b/t/t4135/git-with tab.diff @@ -0,0 +1,7 @@ +diff --git "a/post\timage.txt" "b/post\timage.txt" +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ "b/post\timage.txt" +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/make-patches b/t/t4135/make-patches new file mode 100755 index 0000000000..f5f45ddd09 --- /dev/null +++ b/t/t4135/make-patches @@ -0,0 +1,45 @@ +#!/bin/sh + +do_filename() { + desc=$1 + postimage=$2 + + rm -fr file-creation && + git init file-creation && + ( + cd file-creation && + git commit --allow-empty -m init && + echo postimage >"$postimage" && + git add -N "$postimage" && + git diff HEAD >"../git-$desc.diff" + ) && + + rm -fr trad-modification && + mkdir trad-modification && + ( + cd trad-modification && + echo preimage >"$postimage.orig" && + echo postimage >"$postimage" && + ! diff -u "$postimage.orig" "$postimage" >"../diff-$desc.diff" + ) && + + rm -fr trad-creation && + mkdir trad-creation && + ( + cd trad-creation && + mkdir a b && + echo postimage >"b/$postimage" && + ! diff -pruN a b >"../add-$desc.diff" + ) +} + +do_filename plain postimage.txt && +do_filename 'with spaces' 'post image.txt' && +do_filename 'with tab' 'post image.txt' && +do_filename 'with backslash' 'post\image.txt' && +do_filename 'with quote' '"postimage".txt' && +expand add-plain.diff >damaged.diff || +{ + echo >&2 Failed. && + exit 1 +} diff --git a/t/t4150-am.sh b/t/t4150-am.sh index 810b04b817..1c3d8ed548 100755 --- a/t/t4150-am.sh +++ b/t/t4150-am.sh @@ -4,66 +4,71 @@ test_description='git am running' . ./test-lib.sh -cat >msg <<EOF -second - -Lorem ipsum dolor sit amet, consectetuer sadipscing elitr, sed diam nonumy -eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam -voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita -kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem -ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod -tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At -vero eos et accusam et justo duo dolores et ea rebum. - - Duis autem vel eum iriure dolor in hendrerit in vulputate velit - esse molestie consequat, vel illum dolore eu feugiat nulla facilisis - at vero eros et accumsan et iusto odio dignissim qui blandit - praesent luptatum zzril delenit augue duis dolore te feugait nulla - facilisi. - - -Lorem ipsum dolor sit amet, -consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut -laoreet dolore magna aliquam erat volutpat. - - git - --- - +++ - -Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit -lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure -dolor in hendrerit in vulputate velit esse molestie consequat, vel illum -dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio -dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te -feugait nulla facilisi. -EOF - -cat >failmail <<EOF -From foo@example.com Fri May 23 10:43:49 2008 -From: foo@example.com -To: bar@example.com -Subject: Re: [RFC/PATCH] git-foo.sh -Date: Fri, 23 May 2008 05:23:42 +0200 - -Sometimes we have to find out that there's nothing left. - -EOF - -cat >pine <<EOF -From MAILER-DAEMON Fri May 23 10:43:49 2008 -Date: 23 May 2008 05:23:42 +0200 -From: Mail System Internal Data <MAILER-DAEMON@example.com> -Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA -Message-ID: <foo-0001@example.com> - -This text is part of the internal format of your mail folder, and is not -a real message. It is created automatically by the mail system software. -If deleted, important folder data will be lost, and it will be re-created -with the data reset to initial values. - -EOF - -echo "Signed-off-by: $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" >expected +test_expect_success 'setup: messages' ' + cat >msg <<-\EOF && + second + + Lorem ipsum dolor sit amet, consectetuer sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam + voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita + kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem + ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod + tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At + vero eos et accusam et justo duo dolores et ea rebum. + + EOF + q_to_tab <<-\EOF >>msg && + QDuis autem vel eum iriure dolor in hendrerit in vulputate velit + Qesse molestie consequat, vel illum dolore eu feugiat nulla facilisis + Qat vero eros et accumsan et iusto odio dignissim qui blandit + Qpraesent luptatum zzril delenit augue duis dolore te feugait nulla + Qfacilisi. + EOF + cat >>msg <<-\EOF && + + Lorem ipsum dolor sit amet, + consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut + laoreet dolore magna aliquam erat volutpat. + + git + --- + +++ + + Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit + lobortis nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure + dolor in hendrerit in vulputate velit esse molestie consequat, vel illum + dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio + dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te + feugait nulla facilisi. + EOF + + cat >failmail <<-\EOF && + From foo@example.com Fri May 23 10:43:49 2008 + From: foo@example.com + To: bar@example.com + Subject: Re: [RFC/PATCH] git-foo.sh + Date: Fri, 23 May 2008 05:23:42 +0200 + + Sometimes we have to find out that there'\''s nothing left. + + EOF + + cat >pine <<-\EOF && + From MAILER-DAEMON Fri May 23 10:43:49 2008 + Date: 23 May 2008 05:23:42 +0200 + From: Mail System Internal Data <MAILER-DAEMON@example.com> + Subject: DON'\''T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA + Message-ID: <foo-0001@example.com> + + This text is part of the internal format of your mail folder, and is not + a real message. It is created automatically by the mail system software. + If deleted, important folder data will be lost, and it will be re-created + with the data reset to initial values. + + EOF + + signoff="Signed-off-by: $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" +' test_expect_success setup ' echo hello >file && @@ -71,11 +76,13 @@ test_expect_success setup ' test_tick && git commit -m first && git tag first && + echo world >>file && git add file && test_tick && git commit -s -F msg && git tag second && + git format-patch --stdout first >patch1 && { echo "X-Fake-Field: Line One" && @@ -89,74 +96,101 @@ test_expect_success setup ' echo "X-Fake-Field: Line Three" && git format-patch --stdout first | sed -e "1d" } | append_cr >patch1-crlf.eml && + sed -n -e "3,\$p" msg >file && git add file && test_tick && git commit -m third && + git format-patch --stdout first >patch2 && + git checkout -b lorem && sed -n -e "11,\$p" msg >file && head -n 9 msg >>file && test_tick && git commit -a -m "moved stuff" && + echo goodbye >another && git add another && test_tick && git commit -m "added another file" && - git format-patch --stdout master >lorem-move.patch -' -# reset time -unset test_tick -test_tick + git format-patch --stdout master >lorem-move.patch && + + git checkout -b rename && + git mv file renamed && + git commit -m "renamed a file" && + + git format-patch -M --stdout lorem >rename.patch && + + git reset --soft lorem^ && + git commit -m "renamed a file and added another" && + + git format-patch -M --stdout lorem^ >rename-add.patch && + + # reset time + unset test_tick && + test_tick +' test_expect_success 'am applies patch correctly' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && test_tick && git am <patch1 && ! test -d .git/rebase-apply && - test -z "$(git diff second)" && + git diff --exit-code second && test "$(git rev-parse second)" = "$(git rev-parse HEAD)" && test "$(git rev-parse second^)" = "$(git rev-parse HEAD^)" ' test_expect_success 'am applies patch e-mail not in a mbox' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && git am patch1.eml && ! test -d .git/rebase-apply && - test -z "$(git diff second)" && + git diff --exit-code second && test "$(git rev-parse second)" = "$(git rev-parse HEAD)" && test "$(git rev-parse second^)" = "$(git rev-parse HEAD^)" ' test_expect_success 'am applies patch e-mail not in a mbox with CRLF' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && git am patch1-crlf.eml && ! test -d .git/rebase-apply && - test -z "$(git diff second)" && + git diff --exit-code second && test "$(git rev-parse second)" = "$(git rev-parse HEAD)" && test "$(git rev-parse second^)" = "$(git rev-parse HEAD^)" ' -GIT_AUTHOR_NAME="Another Thor" -GIT_AUTHOR_EMAIL="a.thor@example.com" -GIT_COMMITTER_NAME="Co M Miter" -GIT_COMMITTER_EMAIL="c.miter@example.com" -export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL GIT_COMMITTER_NAME GIT_COMMITTER_EMAIL +test_expect_success 'setup: new author and committer' ' + GIT_AUTHOR_NAME="Another Thor" && + GIT_AUTHOR_EMAIL="a.thor@example.com" && + GIT_COMMITTER_NAME="Co M Miter" && + GIT_COMMITTER_EMAIL="c.miter@example.com" && + export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL GIT_COMMITTER_NAME GIT_COMMITTER_EMAIL +' compare () { - test "$(git cat-file commit "$2" | grep "^$1 ")" = \ - "$(git cat-file commit "$3" | grep "^$1 ")" + a=$(git cat-file commit "$2" | grep "^$1 ") && + b=$(git cat-file commit "$3" | grep "^$1 ") && + test "$a" = "$b" } test_expect_success 'am changes committer and keeps author' ' test_tick && + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && git am patch2 && ! test -d .git/rebase-apply && test "$(git rev-parse master^^)" = "$(git rev-parse HEAD^^)" && - test -z "$(git diff master..HEAD)" && - test -z "$(git diff master^..HEAD^)" && + git diff --exit-code master..HEAD && + git diff --exit-code master^..HEAD^ && compare author master HEAD && compare author master^ HEAD^ && test "$GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" = \ @@ -164,41 +198,55 @@ test_expect_success 'am changes committer and keeps author' ' ' test_expect_success 'am --signoff adds Signed-off-by: line' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout -b master2 first && git am --signoff <patch2 && + printf "%s\n" "$signoff" >expected && echo "Signed-off-by: $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" >>expected && git cat-file commit HEAD^ | grep "Signed-off-by:" >actual && - test_cmp actual expected && + test_cmp expected actual && echo "Signed-off-by: $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" >expected && git cat-file commit HEAD | grep "Signed-off-by:" >actual && - test_cmp actual expected + test_cmp expected actual ' test_expect_success 'am stays in branch' ' - test "refs/heads/master2" = "$(git symbolic-ref HEAD)" + echo refs/heads/master2 >expected && + git symbolic-ref HEAD >actual && + test_cmp expected actual ' test_expect_success 'am --signoff does not add Signed-off-by: line if already there' ' git format-patch --stdout HEAD^ >patch3 && sed -e "/^Subject/ s,\[PATCH,Re: Re: Re: & 1/5 v2," patch3 >patch4 + rm -fr .git/rebase-apply && + git reset --hard && git checkout HEAD^ && git am --signoff patch4 && - test "$(git cat-file commit HEAD | grep -c "^Signed-off-by:")" -eq 1 + git cat-file commit HEAD >actual && + test $(grep -c "^Signed-off-by:" actual) -eq 1 ' test_expect_success 'am without --keep removes Re: and [PATCH] stuff' ' - test "$(git rev-parse HEAD)" = "$(git rev-parse master2)" + git rev-parse HEAD >expected && + git rev-parse master2 >actual && + test_cmp expected actual ' test_expect_success 'am --keep really keeps the subject' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout HEAD^ && git am --keep patch4 && ! test -d .git/rebase-apply && - git cat-file commit HEAD | - fgrep "Re: Re: Re: [PATCH 1/5 v2] third" + git cat-file commit HEAD >actual && + grep "Re: Re: Re: \[PATCH 1/5 v2\] third" actual ' test_expect_success 'am -3 falls back to 3-way merge' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout -b lorem2 master2 && sed -n -e "3,\$p" msg >file && head -n 9 msg >>file && @@ -207,34 +255,75 @@ test_expect_success 'am -3 falls back to 3-way merge' ' git commit -m "copied stuff" && git am -3 lorem-move.patch && ! test -d .git/rebase-apply && - test -z "$(git diff lorem)" + git diff --exit-code lorem +' + +test_expect_success 'am can rename a file' ' + grep "^rename from" rename.patch && + rm -fr .git/rebase-apply && + git reset --hard && + git checkout lorem^0 && + git am rename.patch && + ! test -d .git/rebase-apply && + git update-index --refresh && + git diff --exit-code rename +' + +test_expect_success 'am -3 can rename a file' ' + grep "^rename from" rename.patch && + rm -fr .git/rebase-apply && + git reset --hard && + git checkout lorem^0 && + git am -3 rename.patch && + ! test -d .git/rebase-apply && + git update-index --refresh && + git diff --exit-code rename +' + +test_expect_success 'am -3 can rename a file after falling back to 3-way merge' ' + grep "^rename from" rename-add.patch && + rm -fr .git/rebase-apply && + git reset --hard && + git checkout lorem^0 && + git am -3 rename-add.patch && + ! test -d .git/rebase-apply && + git update-index --refresh && + git diff --exit-code rename ' test_expect_success 'am -3 -q is quiet' ' + rm -fr .git/rebase-apply && + git checkout -f lorem2 && git reset master2 --hard && sed -n -e "3,\$p" msg >file && head -n 9 msg >>file && git add file && test_tick && git commit -m "copied stuff" && - git am -3 -q lorem-move.patch > output.out 2>&1 && + git am -3 -q lorem-move.patch >output.out 2>&1 && ! test -s output.out ' test_expect_success 'am pauses on conflict' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout lorem2^^ && test_must_fail git am lorem-move.patch && test -d .git/rebase-apply ' test_expect_success 'am --skip works' ' + echo goodbye >expected && git am --skip && ! test -d .git/rebase-apply && - test -z "$(git diff lorem2^^ -- file)" && - test goodbye = "$(cat another)" + git diff --exit-code lorem2^^ -- file && + test_cmp expected another ' test_expect_success 'am --resolved works' ' + echo goodbye >expected && + rm -fr .git/rebase-apply && + git reset --hard && git checkout lorem2^^ && test_must_fail git am lorem-move.patch && test -d .git/rebase-apply && @@ -242,22 +331,29 @@ test_expect_success 'am --resolved works' ' git add file && git am --resolved && ! test -d .git/rebase-apply && - test goodbye = "$(cat another)" + test_cmp expected another ' test_expect_success 'am takes patches from a Pine mailbox' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && cat pine patch1 | git am && ! test -d .git/rebase-apply && - test -z "$(git diff master^..HEAD)" + git diff --exit-code master^..HEAD ' test_expect_success 'am fails on mail without patch' ' + rm -fr .git/rebase-apply && + git reset --hard && test_must_fail git am <failmail && - rm -r .git/rebase-apply/ + git am --abort && + ! test -d .git/rebase-apply ' test_expect_success 'am fails on empty patch' ' + rm -fr .git/rebase-apply && + git reset --hard && echo "---" >>failmail && test_must_fail git am <failmail && git am --skip && @@ -266,28 +362,34 @@ test_expect_success 'am fails on empty patch' ' test_expect_success 'am works from stdin in subdirectory' ' rm -fr subdir && + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && ( mkdir -p subdir && cd subdir && git am <../patch1 ) && - test -z "$(git diff second)" + git diff --exit-code second ' test_expect_success 'am works from file (relative path given) in subdirectory' ' rm -fr subdir && + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && ( mkdir -p subdir && cd subdir && git am ../patch1 ) && - test -z "$(git diff second)" + git diff --exit-code second ' test_expect_success 'am works from file (absolute path given) in subdirectory' ' rm -fr subdir && + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && P=$(pwd) && ( @@ -295,27 +397,31 @@ test_expect_success 'am works from file (absolute path given) in subdirectory' ' cd subdir && git am "$P/patch1" ) && - test -z "$(git diff second)" + git diff --exit-code second ' test_expect_success 'am --committer-date-is-author-date' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && test_tick && git am --committer-date-is-author-date patch1 && git cat-file commit HEAD | sed -e "/^\$/q" >head1 && - at=$(sed -ne "/^author /s/.*> //p" head1) && - ct=$(sed -ne "/^committer /s/.*> //p" head1) && - test "$at" = "$ct" + sed -ne "/^author /s/.*> //p" head1 >at && + sed -ne "/^committer /s/.*> //p" head1 >ct && + test_cmp at ct ' test_expect_success 'am without --committer-date-is-author-date' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && test_tick && git am patch1 && git cat-file commit HEAD | sed -e "/^\$/q" >head1 && - at=$(sed -ne "/^author /s/.*> //p" head1) && - ct=$(sed -ne "/^committer /s/.*> //p" head1) && - test "$at" != "$ct" + sed -ne "/^author /s/.*> //p" head1 >at && + sed -ne "/^committer /s/.*> //p" head1 >ct && + ! test_cmp at ct ' # This checks for +0000 because TZ is set to UTC and that should @@ -323,41 +429,51 @@ test_expect_success 'am without --committer-date-is-author-date' ' # by test_tick that uses -0700 timezone; if this feature does not # work, we will see that instead of +0000. test_expect_success 'am --ignore-date' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && test_tick && git am --ignore-date patch1 && git cat-file commit HEAD | sed -e "/^\$/q" >head1 && - at=$(sed -ne "/^author /s/.*> //p" head1) && - echo "$at" | grep "+0000" + sed -ne "/^author /s/.*> //p" head1 >at && + grep "+0000" at ' test_expect_success 'am into an unborn branch' ' + git rev-parse first^{tree} >expected && + rm -fr .git/rebase-apply && + git reset --hard && rm -fr subdir && - mkdir -p subdir && + mkdir subdir && git format-patch --numbered-files -o subdir -1 first && ( cd subdir && git init && git am 1 ) && - result=$( - cd subdir && git rev-parse HEAD^{tree} + ( + cd subdir && + git rev-parse HEAD^{tree} >../actual ) && - test "z$result" = "z$(git rev-parse first^{tree})" + test_cmp expected actual ' test_expect_success 'am newline in subject' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && test_tick && - sed -e "s/second/second \\\n foo/" patch1 > patchnl && - git am < patchnl > output.out 2>&1 && + sed -e "s/second/second \\\n foo/" patch1 >patchnl && + git am <patchnl >output.out 2>&1 && grep "^Applying: second \\\n foo$" output.out ' test_expect_success 'am -q is quiet' ' + rm -fr .git/rebase-apply && + git reset --hard && git checkout first && test_tick && - git am -q < patch1 > output.out 2>&1 && + git am -q <patch1 >output.out 2>&1 && ! test -s output.out ' diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh index 70856d07ed..36255d608a 100755 --- a/t/t4200-rerere.sh +++ b/t/t4200-rerere.sh @@ -4,237 +4,391 @@ # test_description='git rerere + +! [fifth] version1 + ! [first] first + ! [fourth] version1 + ! [master] initial + ! [second] prefer first over second + ! [third] version2 +------ + + [third] version2 ++ [fifth] version1 + + [fourth] version1 ++ + + [third^] third + - [second] prefer first over second + + + [first] first + + [second^] second +++++++ [master] initial ' . ./test-lib.sh -test_expect_success 'setup' " - cat > a1 <<- EOF && +test_expect_success 'setup' ' + cat >a1 <<-\EOF && Some title ========== - Whether 'tis nobler in the mind to suffer + Whether '\''tis nobler in the mind to suffer The slings and arrows of outrageous fortune, Or to take arms against a sea of troubles, And by opposing end them? To die: to sleep; No more; and by a sleep to say we end The heart-ache and the thousand natural shocks - That flesh is heir to, 'tis a consummation - Devoutly to be wish'd. + That flesh is heir to, '\''tis a consummation + Devoutly to be wish'\''d. EOF git add a1 && + test_tick && git commit -q -a -m initial && - git checkout -b first && - cat >> a1 <<- EOF && + cat >>a1 <<-\EOF && Some title ========== To die, to sleep; - To sleep: perchance to dream: ay, there's the rub; + To sleep: perchance to dream: ay, there'\''s the rub; For in that sleep of death what dreams may come When we have shuffled off this mortal coil, - Must give us pause: there's the respect + Must give us pause: there'\''s the respect That makes calamity of so long life; EOF + + git checkout -b first && + test_tick && git commit -q -a -m first && git checkout -b second master && git show first:a1 | - sed -e 's/To die, t/To die! T/' -e 's/Some title/Some Title/' > a1 && - echo '* END *' >>a1 && + sed -e "s/To die, t/To die! T/" -e "s/Some title/Some Title/" >a1 && + echo "* END *" >>a1 && + test_tick && git commit -q -a -m second -" +' test_expect_success 'nothing recorded without rerere' ' - (rm -rf .git/rr-cache; git config rerere.enabled false) && + rm -rf .git/rr-cache && + git config rerere.enabled false && test_must_fail git merge first && ! test -d .git/rr-cache ' -# activate rerere, old style -test_expect_success 'conflicting merge' ' +test_expect_success 'activate rerere, old style (conflicting merge)' ' git reset --hard && mkdir .git/rr-cache && - git config --unset rerere.enabled && - test_must_fail git merge first -' + test_might_fail git config --unset rerere.enabled && + test_must_fail git merge first && -sha1=$(perl -pe 's/ .*//' .git/MERGE_RR) -rr=.git/rr-cache/$sha1 -test_expect_success 'recorded preimage' "grep ^=======$ $rr/preimage" + sha1=$(perl -pe "s/ .*//" .git/MERGE_RR) && + rr=.git/rr-cache/$sha1 && + grep "^=======\$" $rr/preimage && + ! test -f $rr/postimage && + ! test -f $rr/thisimage +' test_expect_success 'rerere.enabled works, too' ' rm -rf .git/rr-cache && git config rerere.enabled true && git reset --hard && test_must_fail git merge first && + + sha1=$(perl -pe "s/ .*//" .git/MERGE_RR) && + rr=.git/rr-cache/$sha1 && grep ^=======$ $rr/preimage ' -test_expect_success 'no postimage or thisimage yet' \ - "test ! -f $rr/postimage -a ! -f $rr/thisimage" +test_expect_success 'set up rr-cache' ' + rm -rf .git/rr-cache && + git config rerere.enabled true && + git reset --hard && + test_must_fail git merge first && + sha1=$(perl -pe "s/ .*//" .git/MERGE_RR) && + rr=.git/rr-cache/$sha1 +' -test_expect_success 'preimage has right number of lines' ' +test_expect_success 'rr-cache looks sane' ' + # no postimage or thisimage yet + ! test -f $rr/postimage && + ! test -f $rr/thisimage && + # preimage has right number of lines cnt=$(sed -ne "/^<<<<<<</,/^>>>>>>>/p" $rr/preimage | wc -l) && + echo $cnt && test $cnt = 13 - ' -git show first:a1 > a1 - -cat > expect << EOF ---- a/a1 -+++ b/a1 -@@ -1,4 +1,4 @@ --Some Title -+Some title - ========== - Whether 'tis nobler in the mind to suffer - The slings and arrows of outrageous fortune, -@@ -8,21 +8,11 @@ - The heart-ache and the thousand natural shocks - That flesh is heir to, 'tis a consummation - Devoutly to be wish'd. --<<<<<<< --Some Title --========== --To die! To sleep; --======= - Some title - ========== - To die, to sleep; -->>>>>>> - To sleep: perchance to dream: ay, there's the rub; - For in that sleep of death what dreams may come - When we have shuffled off this mortal coil, - Must give us pause: there's the respect - That makes calamity of so long life; --<<<<<<< --======= --* END * -->>>>>>> -EOF -git rerere diff > out - -test_expect_success 'rerere diff' 'test_cmp expect out' - -cat > expect << EOF -a1 -EOF - -git rerere status > out - -test_expect_success 'rerere status' 'test_cmp expect out' - -test_expect_success 'commit succeeds' \ - "git commit -q -a -m 'prefer first over second'" - -test_expect_success 'recorded postimage' "test -f $rr/postimage" - -test_expect_success 'another conflicting merge' ' - git checkout -b third master && - git show second^:a1 | sed "s/To die: t/To die! T/" > a1 && - git commit -q -a -m third && - test_must_fail git pull . first +test_expect_success 'rerere diff' ' + git show first:a1 >a1 && + cat >expect <<-\EOF && + --- a/a1 + +++ b/a1 + @@ -1,4 +1,4 @@ + -Some Title + +Some title + ========== + Whether '\''tis nobler in the mind to suffer + The slings and arrows of outrageous fortune, + @@ -8,21 +8,11 @@ + The heart-ache and the thousand natural shocks + That flesh is heir to, '\''tis a consummation + Devoutly to be wish'\''d. + -<<<<<<< + -Some Title + -========== + -To die! To sleep; + -======= + Some title + ========== + To die, to sleep; + ->>>>>>> + To sleep: perchance to dream: ay, there'\''s the rub; + For in that sleep of death what dreams may come + When we have shuffled off this mortal coil, + Must give us pause: there'\''s the respect + That makes calamity of so long life; + -<<<<<<< + -======= + -* END * + ->>>>>>> + EOF + git rerere diff >out && + test_cmp expect out ' -git show first:a1 | sed 's/To die: t/To die! T/' > expect -test_expect_success 'rerere kicked in' "! grep ^=======$ a1" +test_expect_success 'rerere status' ' + echo a1 >expect && + git rerere status >out && + test_cmp expect out +' -test_expect_success 'rerere prefers first change' 'test_cmp a1 expect' +test_expect_success 'first postimage wins' ' + git show first:a1 | sed "s/To die: t/To die! T/" >expect && -rm $rr/postimage -echo "$sha1 a1" | perl -pe 'y/\012/\000/' > .git/MERGE_RR + git commit -q -a -m "prefer first over second" && + test -f $rr/postimage && -test_expect_success 'rerere clear' 'git rerere clear' + oldmtimepost=$(test-chmtime -v -60 $rr/postimage | cut -f 1) && -test_expect_success 'clear removed the directory' "test ! -d $rr" + git checkout -b third master && + git show second^:a1 | sed "s/To die: t/To die! T/" >a1 && + git commit -q -a -m third && -mkdir $rr -echo Hello > $rr/preimage -echo World > $rr/postimage + test_must_fail git pull . first && + # rerere kicked in + ! grep "^=======\$" a1 && + test_cmp expect a1 +' -sha2=4000000000000000000000000000000000000000 -rr2=.git/rr-cache/$sha2 -mkdir $rr2 -echo Hello > $rr2/preimage +test_expect_success 'rerere updates postimage timestamp' ' + newmtimepost=$(test-chmtime -v +0 $rr/postimage | cut -f 1) && + test $oldmtimepost -lt $newmtimepost +' -almost_15_days_ago=$((60-15*86400)) -just_over_15_days_ago=$((-1-15*86400)) -almost_60_days_ago=$((60-60*86400)) -just_over_60_days_ago=$((-1-60*86400)) +test_expect_success 'rerere clear' ' + rm $rr/postimage && + echo "$sha1 a1" | perl -pe "y/\012/\000/" >.git/MERGE_RR && + git rerere clear && + ! test -d $rr +' -test-chmtime =$almost_60_days_ago $rr/preimage -test-chmtime =$almost_15_days_ago $rr2/preimage +test_expect_success 'set up for garbage collection tests' ' + mkdir -p $rr && + echo Hello >$rr/preimage && + echo World >$rr/postimage && -test_expect_success 'garbage collection (part1)' 'git rerere gc' + sha2=4000000000000000000000000000000000000000 && + rr2=.git/rr-cache/$sha2 && + mkdir $rr2 && + echo Hello >$rr2/preimage && -test_expect_success 'young records still live' \ - "test -f $rr/preimage && test -f $rr2/preimage" + almost_15_days_ago=$((60-15*86400)) && + just_over_15_days_ago=$((-1-15*86400)) && + almost_60_days_ago=$((60-60*86400)) && + just_over_60_days_ago=$((-1-60*86400)) && -test-chmtime =$just_over_60_days_ago $rr/preimage -test-chmtime =$just_over_15_days_ago $rr2/preimage + test-chmtime =$just_over_60_days_ago $rr/preimage && + test-chmtime =$almost_60_days_ago $rr/postimage && + test-chmtime =$almost_15_days_ago $rr2/preimage +' -test_expect_success 'garbage collection (part2)' 'git rerere gc' +test_expect_success 'gc preserves young or recently used records' ' + git rerere gc && + test -f $rr/preimage && + test -f $rr2/preimage +' -test_expect_success 'old records rest in peace' \ - "test ! -f $rr/preimage && test ! -f $rr2/preimage" +test_expect_success 'old records rest in peace' ' + test-chmtime =$just_over_60_days_ago $rr/postimage && + test-chmtime =$just_over_15_days_ago $rr2/preimage && + git rerere gc && + ! test -f $rr/preimage && + ! test -f $rr2/preimage +' -test_expect_success 'file2 added differently in two branches' ' +test_expect_success 'setup: file2 added differently in two branches' ' git reset --hard && + git checkout -b fourth && - echo Hallo > file2 && + echo Hallo >file2 && git add file2 && + test_tick && git commit -m version1 && + git checkout third && - echo Bello > file2 && + echo Bello >file2 && git add file2 && + test_tick && git commit -m version2 && + test_must_fail git merge fourth && - echo Cello > file2 && + echo Cello >file2 && git add file2 && git commit -m resolution ' test_expect_success 'resolution was recorded properly' ' + echo Cello >expected && + git reset --hard HEAD~2 && git checkout -b fifth && - echo Hallo > file3 && + + echo Hallo >file3 && git add file3 && + test_tick && git commit -m version1 && + git checkout third && - echo Bello > file3 && + echo Bello >file3 && git add file3 && + test_tick && git commit -m version2 && git tag version2 && + test_must_fail git merge fifth && - test Cello = "$(cat file3)" && - test 0 != $(git ls-files -u | wc -l) + test_cmp expected file3 && + test_must_fail git update-index --refresh ' test_expect_success 'rerere.autoupdate' ' - git config rerere.autoupdate true + git config rerere.autoupdate true && git reset --hard && git checkout version2 && test_must_fail git merge fifth && - test 0 = $(git ls-files -u | wc -l) + git update-index --refresh ' test_expect_success 'merge --rerere-autoupdate' ' - git config --unset rerere.autoupdate + test_might_fail git config --unset rerere.autoupdate && git reset --hard && git checkout version2 && test_must_fail git merge --rerere-autoupdate fifth && - test 0 = $(git ls-files -u | wc -l) + git update-index --refresh ' test_expect_success 'merge --no-rerere-autoupdate' ' - git config rerere.autoupdate true + headblob=$(git rev-parse version2:file3) && + mergeblob=$(git rev-parse fifth:file3) && + cat >expected <<-EOF && + 100644 $headblob 2 file3 + 100644 $mergeblob 3 file3 + EOF + + git config rerere.autoupdate true && git reset --hard && git checkout version2 && test_must_fail git merge --no-rerere-autoupdate fifth && - test 2 = $(git ls-files -u | wc -l) + git ls-files -u >actual && + test_cmp expected actual +' + +test_expect_success 'set up an unresolved merge' ' + headblob=$(git rev-parse version2:file3) && + mergeblob=$(git rev-parse fifth:file3) && + cat >expected.unresolved <<-EOF && + 100644 $headblob 2 file3 + 100644 $mergeblob 3 file3 + EOF + + test_might_fail git config --unset rerere.autoupdate && + git reset --hard && + git checkout version2 && + fifth=$(git rev-parse fifth) && + echo "$fifth branch 'fifth' of ." | + git fmt-merge-msg >msg && + ancestor=$(git merge-base version2 fifth) && + test_must_fail git merge-recursive "$ancestor" -- HEAD fifth && + + git ls-files --stage >failedmerge && + cp file3 file3.conflict && + + git ls-files -u >actual && + test_cmp expected.unresolved actual +' + +test_expect_success 'explicit rerere' ' + test_might_fail git config --unset rerere.autoupdate && + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + test_must_fail git update-index --refresh -q && + + git rerere && + git ls-files -u >actual && + test_cmp expected.unresolved actual +' + +test_expect_success 'explicit rerere with autoupdate' ' + git config rerere.autoupdate true && + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + test_must_fail git update-index --refresh -q && + + git rerere && + git update-index --refresh +' + +test_expect_success 'explicit rerere --rerere-autoupdate overrides' ' + git config rerere.autoupdate false && + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere && + git ls-files -u >actual1 && + + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere --rerere-autoupdate && + git update-index --refresh && + + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere --rerere-autoupdate --no-rerere-autoupdate && + git ls-files -u >actual2 && + + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere --rerere-autoupdate --no-rerere-autoupdate --rerere-autoupdate && + git update-index --refresh && + + test_cmp expected.unresolved actual1 && + test_cmp expected.unresolved actual2 +' + +test_expect_success 'rerere --no-no-rerere-autoupdate' ' + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + test_must_fail git rerere --no-no-rerere-autoupdate 2>err && + grep [Uu]sage err && + test_must_fail git update-index --refresh +' + +test_expect_success 'rerere -h' ' + test_must_fail git rerere -h >help && + grep [Uu]sage help ' test_done diff --git a/t/t4202-log.sh b/t/t4202-log.sh index 2230e606ed..2e51356947 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -100,13 +100,11 @@ test_expect_success 'oneline' ' test_expect_success 'diff-filter=A' ' - actual=$(git log --pretty="format:%s" --diff-filter=A HEAD) && - expect=$(echo fifth ; echo fourth ; echo third ; echo initial) && - test "$actual" = "$expect" || { - echo Oops - echo "Actual: $actual" - false - } + git log --pretty="format:%s" --diff-filter=A HEAD > actual && + git log --pretty="format:%s" --diff-filter A HEAD > actual-separate && + printf "fifth\nfourth\nthird\ninitial" > expect && + test_cmp expect actual && + test_cmp expect actual-separate ' @@ -203,6 +201,13 @@ test_expect_success 'log --grep' ' test_cmp expect actual ' +test_expect_success 'log --grep option parsing' ' + echo second >expect && + git log -1 --pretty="tformat:%s" --grep sec >actual && + test_cmp expect actual && + test_must_fail git log -1 --pretty="tformat:%s" --grep +' + test_expect_success 'log -i --grep' ' echo Second >expect && git log -1 --pretty="tformat:%s" -i --grep=sec >actual && @@ -436,5 +441,17 @@ test_expect_success 'log.decorate configuration' ' ' -test_done +test_expect_success 'show added path under "--follow -M"' ' + # This tests for a regression introduced in v1.7.2-rc0~103^2~2 + test_create_repo regression && + ( + cd regression && + test_commit needs-another-commit && + test_commit foo.bar && + git log -M --follow -p foo.bar.t && + git log -M --follow --stat foo.bar.t && + git log -M --follow --name-only foo.bar.t + ) +' +test_done diff --git a/t/t5001-archive-attr.sh b/t/t5001-archive-attr.sh index 426b319bd3..02d4d2284d 100755 --- a/t/t5001-archive-attr.sh +++ b/t/t5001-archive-attr.sh @@ -4,7 +4,7 @@ test_description='git archive attribute tests' . ./test-lib.sh -SUBSTFORMAT=%H%n +SUBSTFORMAT='%H (%h)%n' test_expect_exists() { test_expect_success " $1 exists" "test -e $1" diff --git a/t/t5520-pull.sh b/t/t5520-pull.sh index 319e389ed0..0b489f5b12 100755 --- a/t/t5520-pull.sh +++ b/t/t5520-pull.sh @@ -4,6 +4,11 @@ test_description='pulling into void' . ./test-lib.sh +modify () { + sed -e "$1" <"$2" >"$2.x" && + mv "$2.x" "$2" +} + D=`pwd` test_expect_success setup ' @@ -160,4 +165,61 @@ test_expect_success 'pull --rebase works on branch yet to be born' ' test_cmp expect actual ' +test_expect_success 'setup for detecting upstreamed changes' ' + mkdir src && + (cd src && + git init && + printf "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n" > stuff && + git add stuff && + git commit -m "Initial revision" + ) && + git clone src dst && + (cd src && + modify s/5/43/ stuff && + git commit -a -m "5->43" && + modify s/6/42/ stuff && + git commit -a -m "Make it bigger" + ) && + (cd dst && + modify s/5/43/ stuff && + git commit -a -m "Independent discovery of 5->43" + ) +' + +test_expect_success 'git pull --rebase detects upstreamed changes' ' + (cd dst && + git pull --rebase && + test -z "$(git ls-files -u)" + ) +' + +test_expect_success 'setup for avoiding reapplying old patches' ' + (cd dst && + test_might_fail git rebase --abort && + git reset --hard origin/master + ) && + git clone --bare src src-replace.git && + rm -rf src && + mv src-replace.git src && + (cd dst && + modify s/2/22/ stuff && + git commit -a -m "Change 2" && + modify s/3/33/ stuff && + git commit -a -m "Change 3" && + modify s/4/44/ stuff && + git commit -a -m "Change 4" && + git push && + + modify s/44/55/ stuff && + git commit --amend -a -m "Modified Change 4" + ) +' + +test_expect_success 'git pull --rebase does not reapply old patches' ' + (cd dst && + test_must_fail git pull --rebase && + test 1 = $(find .git/rebase-apply -name "000*" | wc -l) + ) +' + test_done diff --git a/t/t5525-fetch-tagopt.sh b/t/t5525-fetch-tagopt.sh new file mode 100755 index 0000000000..4fbf7a120f --- /dev/null +++ b/t/t5525-fetch-tagopt.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +test_description='tagopt variable affects "git fetch" and is overridden by commandline.' + +. ./test-lib.sh + +setup_clone () { + git clone --mirror . $1 && + git remote add remote_$1 $1 && + (cd $1 && + git tag tag_$1) +} + +test_expect_success setup ' + test_commit test && + setup_clone one && + git config remote.remote_one.tagopt --no-tags && + setup_clone two && + git config remote.remote_two.tagopt --tags + ' + +test_expect_success "fetch with tagopt=--no-tags does not get tag" ' + git fetch remote_one && + test_must_fail git show-ref tag_one + ' + +test_expect_success "fetch --tags with tagopt=--no-tags gets tag" ' + git fetch --tags remote_one && + git show-ref tag_one + ' + +test_expect_success "fetch --no-tags with tagopt=--tags does not get tag" ' + git fetch --no-tags remote_two && + test_must_fail git show-ref tag_two + ' + +test_expect_success "fetch with tagopt=--tags gets tag" ' + git fetch remote_two && + git show-ref tag_two + ' +test_done diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh index 044603c26e..6b2a5f4a65 100755 --- a/t/t5530-upload-pack-error.sh +++ b/t/t5530-upload-pack-error.sh @@ -60,6 +60,15 @@ test_expect_success 'upload-pack fails due to error in rev-list' ' grep "bad tree object" output.err ' +test_expect_success 'upload-pack error message when bad ref requested' ' + + printf "0045want %s multi_ack_detailed\n00000009done\n0000" \ + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" >input && + test_must_fail git upload-pack . <input >output 2>output.err && + grep -q "not our ref" output.err && + ! grep -q multi_ack_detailed output.err +' + test_expect_success 'upload-pack fails due to error in pack-objects enumeration' ' printf "0032want %s\n00000009done\n0000" \ diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 8abb71afcd..4431dfd02b 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -178,8 +178,14 @@ test_expect_success 'clone respects global branch.autosetuprebase' ' test_expect_success 'respect url-encoding of file://' ' git init x+y && - test_must_fail git clone "file://$PWD/x+y" xy-url && - git clone "file://$PWD/x%2By" xy-url + git clone "file://$PWD/x+y" xy-url-1 && + git clone "file://$PWD/x%2By" xy-url-2 +' + +test_expect_success 'do not query-string-decode + in URLs' ' + rm -rf x+y && + git init "x y" && + test_must_fail git clone "file://$PWD/x+y" xy-no-plus ' test_expect_success 'do not respect url-encoding of non-url path' ' diff --git a/t/t6010-merge-base.sh b/t/t6010-merge-base.sh index 0144d9e858..62197a3d35 100755 --- a/t/t6010-merge-base.sh +++ b/t/t6010-merge-base.sh @@ -3,175 +3,231 @@ # Copyright (c) 2005 Junio C Hamano # -test_description='Merge base computation. +test_description='Merge base and parent list computation. ' . ./test-lib.sh -T=$(git write-tree) - -M=1130000000 -Z=+0000 - -GIT_COMMITTER_EMAIL=git@comm.iter.xz -GIT_COMMITTER_NAME='C O Mmiter' -GIT_AUTHOR_NAME='A U Thor' -GIT_AUTHOR_EMAIL=git@au.thor.xz -export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL - -doit() { - OFFSET=$1; shift - NAME=$1; shift - PARENTS= - for P - do - PARENTS="${PARENTS}-p $P " - done - GIT_COMMITTER_DATE="$(($M + $OFFSET)) $Z" - GIT_AUTHOR_DATE=$GIT_COMMITTER_DATE - export GIT_COMMITTER_DATE GIT_AUTHOR_DATE - commit=$(echo $NAME | git commit-tree $T $PARENTS) - echo $commit >.git/refs/tags/$NAME - echo $commit -} - -# E---D---C---B---A -# \'-_ \ \ -# \ `---------G \ -# \ \ -# F----------------H - -# Setup... -E=$(doit 5 E) -D=$(doit 4 D $E) -F=$(doit 6 F $E) -C=$(doit 3 C $D) -B=$(doit 2 B $C) -A=$(doit 1 A $B) -G=$(doit 7 G $B $E) -H=$(doit 8 H $A $F) - -test_expect_success 'compute merge-base (single)' \ - 'MB=$(git merge-base G H) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/B"' - -test_expect_success 'compute merge-base (all)' \ - 'MB=$(git merge-base --all G H) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/B"' - -test_expect_success 'compute merge-base with show-branch' \ - 'MB=$(git show-branch --merge-base G H) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/B"' - -# Setup for second test to demonstrate that relying on timestamps in a -# distributed SCM to provide a _consistent_ partial ordering of commits -# leads to insanity. -# -# Relative -# Structure timestamps -# -# PL PR +4 +4 -# / \/ \ / \/ \ -# L2 C2 R2 +3 -1 +3 -# | | | | | | -# L1 C1 R1 +2 -2 +2 -# | | | | | | -# L0 C0 R0 +1 -3 +1 -# \ | / \ | / -# S 0 -# -# The left and right chains of commits can be of any length and complexity as -# long as all of the timestamps are greater than that of S. +test_expect_success 'setup' ' + T=$(git write-tree) && -S=$(doit 0 S) + M=1130000000 && + Z=+0000 && -C0=$(doit -3 C0 $S) -C1=$(doit -2 C1 $C0) -C2=$(doit -1 C2 $C1) + GIT_COMMITTER_EMAIL=git@comm.iter.xz && + GIT_COMMITTER_NAME="C O Mmiter" && + GIT_AUTHOR_NAME="A U Thor" && + GIT_AUTHOR_EMAIL=git@au.thor.xz && + export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL && -L0=$(doit 1 L0 $S) -L1=$(doit 2 L1 $L0) -L2=$(doit 3 L2 $L1) + doit() { + OFFSET=$1 && + NAME=$2 && + shift 2 && -R0=$(doit 1 R0 $S) -R1=$(doit 2 R1 $R0) -R2=$(doit 3 R2 $R1) + PARENTS= && + for P + do + PARENTS="${PARENTS}-p $P " + done && -PL=$(doit 4 PL $L2 $C2) -PR=$(doit 4 PR $C2 $R2) + GIT_COMMITTER_DATE="$(($M + $OFFSET)) $Z" && + GIT_AUTHOR_DATE=$GIT_COMMITTER_DATE && + export GIT_COMMITTER_DATE GIT_AUTHOR_DATE && -test_expect_success 'compute merge-base (single)' \ - 'MB=$(git merge-base PL PR) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/C2"' + commit=$(echo $NAME | git commit-tree $T $PARENTS) && -test_expect_success 'compute merge-base (all)' \ - 'MB=$(git merge-base --all PL PR) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/C2"' + echo $commit >.git/refs/tags/$NAME && + echo $commit + } +' -# Another set to demonstrate base between one commit and a merge -# in the documentation. -# -# * C (MMC) * B (MMB) * A (MMA) -# * o * o * o -# * o * o * o -# * o * o * o -# * o | _______/ -# | |/ -# | * 1 (MM1) -# | _______/ -# |/ -# * root (MMR) +test_expect_success 'set up G and H' ' + # E---D---C---B---A + # \"-_ \ \ + # \ `---------G \ + # \ \ + # F----------------H + E=$(doit 5 E) && + D=$(doit 4 D $E) && + F=$(doit 6 F $E) && + C=$(doit 3 C $D) && + B=$(doit 2 B $C) && + A=$(doit 1 A $B) && + G=$(doit 7 G $B $E) && + H=$(doit 8 H $A $F) +' + +test_expect_success 'merge-base G H' ' + git name-rev $B >expected && + + MB=$(git merge-base G H) && + git name-rev "$MB" >actual.single && + + MB=$(git merge-base --all G H) && + git name-rev "$MB" >actual.all && + + MB=$(git show-branch --merge-base G H) && + git name-rev "$MB" >actual.sb && + + test_cmp expected actual.single && + test_cmp expected actual.all && + test_cmp expected actual.sb +' +test_expect_success 'merge-base/show-branch --independent' ' + git name-rev "$H" >expected1 && + git name-rev "$H" "$G" >expected2 && + + parents=$(git merge-base --independent H) && + git name-rev $parents >actual1.mb && + parents=$(git merge-base --independent A H G) && + git name-rev $parents >actual2.mb && + + parents=$(git show-branch --independent H) && + git name-rev $parents >actual1.sb && + parents=$(git show-branch --independent A H G) && + git name-rev $parents >actual2.sb && + + test_cmp expected1 actual1.mb && + test_cmp expected2 actual2.mb && + test_cmp expected1 actual1.sb && + test_cmp expected2 actual2.sb +' + +test_expect_success 'unsynchronized clocks' ' + # This test is to demonstrate that relying on timestamps in a distributed + # SCM to provide a _consistent_ partial ordering of commits leads to + # insanity. + # + # Relative + # Structure timestamps + # + # PL PR +4 +4 + # / \/ \ / \/ \ + # L2 C2 R2 +3 -1 +3 + # | | | | | | + # L1 C1 R1 +2 -2 +2 + # | | | | | | + # L0 C0 R0 +1 -3 +1 + # \ | / \ | / + # S 0 + # + # The left and right chains of commits can be of any length and complexity as + # long as all of the timestamps are greater than that of S. + + S=$(doit 0 S) && + + C0=$(doit -3 C0 $S) && + C1=$(doit -2 C1 $C0) && + C2=$(doit -1 C2 $C1) && + + L0=$(doit 1 L0 $S) && + L1=$(doit 2 L1 $L0) && + L2=$(doit 3 L2 $L1) && + + R0=$(doit 1 R0 $S) && + R1=$(doit 2 R1 $R0) && + R2=$(doit 3 R2 $R1) && + + PL=$(doit 4 PL $L2 $C2) && + PR=$(doit 4 PR $C2 $R2) + + git name-rev $C2 >expected && + + MB=$(git merge-base PL PR) && + git name-rev "$MB" >actual.single && + + MB=$(git merge-base --all PL PR) && + git name-rev "$MB" >actual.all && + + test_cmp expected actual.single && + test_cmp expected actual.all +' + +test_expect_success '--independent with unsynchronized clocks' ' + IB=$(doit 0 IB) && + I1=$(doit -10 I1 $IB) && + I2=$(doit -9 I2 $I1) && + I3=$(doit -8 I3 $I2) && + I4=$(doit -7 I4 $I3) && + I5=$(doit -6 I5 $I4) && + I6=$(doit -5 I6 $I5) && + I7=$(doit -4 I7 $I6) && + I8=$(doit -3 I8 $I7) && + IH=$(doit -2 IH $I8) && + + echo $IH >expected && + git merge-base --independent IB IH >actual && + test_cmp expected actual +' test_expect_success 'merge-base for octopus-step (setup)' ' - test_tick && git commit --allow-empty -m root && git tag MMR && - test_tick && git commit --allow-empty -m 1 && git tag MM1 && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m A && git tag MMA && + # Another set to demonstrate base between one commit and a merge + # in the documentation. + # + # * C (MMC) * B (MMB) * A (MMA) + # * o * o * o + # * o * o * o + # * o * o * o + # * o | _______/ + # | |/ + # | * 1 (MM1) + # | _______/ + # |/ + # * root (MMR) + + test_commit MMR && + test_commit MM1 && + test_commit MM-o && + test_commit MM-p && + test_commit MM-q && + test_commit MMA && git checkout MM1 && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m B && git tag MMB && + test_commit MM-r && + test_commit MM-s && + test_commit MM-t && + test_commit MMB && git checkout MMR && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m C && git tag MMC + test_commit MM-u && + test_commit MM-v && + test_commit MM-w && + test_commit MM-x && + test_commit MMC ' test_expect_success 'merge-base A B C' ' - MB=$(git merge-base --all MMA MMB MMC) && - MM1=$(git rev-parse --verify MM1) && - test "$MM1" = "$MB" -' + git rev-parse --verify MM1 >expected && + git rev-parse --verify MMR >expected.sb && -test_expect_success 'merge-base A B C using show-branch' ' - MB=$(git show-branch --merge-base MMA MMB MMC) && - MMR=$(git rev-parse --verify MMR) && - test "$MMR" = "$MB" + git merge-base --all MMA MMB MMC >actual && + git merge-base --all --octopus MMA MMB MMC >actual.common && + git show-branch --merge-base MMA MMB MMC >actual.sb && + + test_cmp expected actual && + test_cmp expected.sb actual.common && + test_cmp expected.sb actual.sb ' -test_expect_success 'criss-cross merge-base for octopus-step (setup)' ' +test_expect_success 'criss-cross merge-base for octopus-step' ' git reset --hard MMR && - test_tick && git commit --allow-empty -m 1 && git tag CC1 && + test_commit CC1 && git reset --hard E && - test_tick && git commit --allow-empty -m 2 && git tag CC2 && - test_tick && git merge -s ours CC1 && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m B && git tag CCB && + test_commit CC2 && + test_tick && + git merge -s ours CC1 && + test_commit CC-o && + test_commit CCB && git reset --hard CC1 && - test_tick && git merge -s ours CC2 && - test_tick && git commit --allow-empty -m A && git tag CCA -' + git merge -s ours CC2 && + test_commit CCA && + + git rev-parse CC1 CC2 >expected && + git merge-base --all CCB CCA^^ CCA^^2 >actual && -test_expect_success 'merge-base B A^^ A^^2' ' - MB0=$(git merge-base --all CCB CCA^^ CCA^^2 | sort) && - MB1=$(git rev-parse CC1 CC2 | sort) && - test "$MB0" = "$MB1" + sort expected >expected.sorted && + sort actual >actual.sorted && + test_cmp expected.sorted actual.sorted ' test_done diff --git a/t/t6018-rev-list-glob.sh b/t/t6018-rev-list-glob.sh index 58428d9f5c..fb8291c812 100755 --- a/t/t6018-rev-list-glob.sh +++ b/t/t6018-rev-list-glob.sh @@ -123,6 +123,12 @@ test_expect_success 'rev-list --glob=refs/heads/subspace/*' ' ' +test_expect_success 'rev-list --glob refs/heads/subspace/*' ' + + compare rev-list "subspace/one subspace/two" "--glob refs/heads/subspace/*" + +' + test_expect_success 'rev-list --glob=heads/subspace/*' ' compare rev-list "subspace/one subspace/two" "--glob=heads/subspace/*" diff --git a/t/t6020-merge-df.sh b/t/t6020-merge-df.sh index e71c687f2b..490d397114 100755 --- a/t/t6020-merge-df.sh +++ b/t/t6020-merge-df.sh @@ -22,7 +22,7 @@ git commit -m "File: dir"' test_expect_code 1 'Merge with d/f conflicts' 'git merge "merge msg" B master' -test_expect_failure 'F/D conflict' ' +test_expect_success 'F/D conflict' ' git reset --hard && git checkout master && rm .git/index && diff --git a/t/t6031-merge-recursive.sh b/t/t6031-merge-recursive.sh index 8a3304fb0b..bd75e0e643 100755 --- a/t/t6031-merge-recursive.sh +++ b/t/t6031-merge-recursive.sh @@ -57,4 +57,35 @@ test_expect_success FILEMODE 'verify executable bit on file' ' test -x file2 ' +test_expect_success 'merging with triple rename across D/F conflict' ' + git reset --hard HEAD && + git checkout -b main && + git rm -rf . && + + echo "just a file" >sub1 && + mkdir -p sub2 && + echo content1 >sub2/file1 && + echo content2 >sub2/file2 && + echo content3 >sub2/file3 && + mkdir simple && + echo base >simple/bar && + git add -A && + test_tick && + git commit -m base && + + git checkout -b other && + echo more >>simple/bar && + test_tick && + git commit -a -m changesimplefile && + + git checkout main && + git rm sub1 && + git mv sub2 sub1 && + test_tick && + git commit -m changefiletodir && + + test_tick && + git merge other +' + test_done diff --git a/t/t6035-merge-dir-to-symlink.sh b/t/t6035-merge-dir-to-symlink.sh index cd3190c4a6..dc09513be5 100755 --- a/t/t6035-merge-dir-to-symlink.sh +++ b/t/t6035-merge-dir-to-symlink.sh @@ -48,7 +48,7 @@ test_expect_success 'setup for merge test' ' git tag baseline ' -test_expect_success 'do not lose a/b-2/c/d in merge (resolve)' ' +test_expect_success 'Handle D/F conflict, do not lose a/b-2/c/d in merge (resolve)' ' git reset --hard && git checkout baseline^0 && git merge -s resolve master && @@ -56,7 +56,7 @@ test_expect_success 'do not lose a/b-2/c/d in merge (resolve)' ' test -f a/b-2/c/d ' -test_expect_failure 'do not lose a/b-2/c/d in merge (recursive)' ' +test_expect_success 'Handle D/F conflict, do not lose a/b-2/c/d in merge (recursive)' ' git reset --hard && git checkout baseline^0 && git merge -s recursive master && @@ -64,6 +64,54 @@ test_expect_failure 'do not lose a/b-2/c/d in merge (recursive)' ' test -f a/b-2/c/d ' +test_expect_success 'Handle F/D conflict, do not lose a/b-2/c/d in merge (resolve)' ' + git reset --hard && + git checkout master^0 && + git merge -s resolve baseline^0 && + test -h a/b && + test -f a/b-2/c/d +' + +test_expect_success 'Handle F/D conflict, do not lose a/b-2/c/d in merge (recursive)' ' + git reset --hard && + git checkout master^0 && + git merge -s recursive baseline^0 && + test -h a/b && + test -f a/b-2/c/d +' + +test_expect_failure 'do not lose untracked in merge (resolve)' ' + git reset --hard && + git checkout baseline^0 && + >a/b/c/e && + test_must_fail git merge -s resolve master && + test -f a/b/c/e && + test -f a/b-2/c/d +' + +test_expect_success 'do not lose untracked in merge (recursive)' ' + git reset --hard && + git checkout baseline^0 && + >a/b/c/e && + test_must_fail git merge -s recursive master && + test -f a/b/c/e && + test -f a/b-2/c/d +' + +test_expect_success 'do not lose modifications in merge (resolve)' ' + git reset --hard && + git checkout baseline^0 && + echo more content >>a/b/c/d && + test_must_fail git merge -s resolve master +' + +test_expect_success 'do not lose modifications in merge (recursive)' ' + git reset --hard && + git checkout baseline^0 && + echo more content >>a/b/c/d && + test_must_fail git merge -s recursive master +' + test_expect_success 'setup a merge where dir a/b-2 changed to symlink' ' git reset --hard && git checkout start^0 && @@ -74,7 +122,7 @@ test_expect_success 'setup a merge where dir a/b-2 changed to symlink' ' git tag test2 ' -test_expect_success 'merge should not have conflicts (resolve)' ' +test_expect_success 'merge should not have D/F conflicts (resolve)' ' git reset --hard && git checkout baseline^0 && git merge -s resolve test2 && @@ -82,7 +130,7 @@ test_expect_success 'merge should not have conflicts (resolve)' ' test -f a/b/c/d ' -test_expect_failure 'merge should not have conflicts (recursive)' ' +test_expect_success 'merge should not have D/F conflicts (recursive)' ' git reset --hard && git checkout baseline^0 && git merge -s recursive test2 && @@ -90,4 +138,12 @@ test_expect_failure 'merge should not have conflicts (recursive)' ' test -f a/b/c/d ' +test_expect_success 'merge should not have F/D conflicts (recursive)' ' + git reset --hard && + git checkout -b foo test2 && + git merge -s recursive baseline^0 && + test -h a/b-2 && + test -f a/b/c/d +' + test_done diff --git a/t/t6037-merge-ours-theirs.sh b/t/t6037-merge-ours-theirs.sh index 8ab3d61f44..2cf42c73f1 100755 --- a/t/t6037-merge-ours-theirs.sh +++ b/t/t6037-merge-ours-theirs.sh @@ -58,7 +58,7 @@ test_expect_success 'pull with -X' ' git reset --hard master && git pull -s recursive -X ours . side && git reset --hard master && git pull -s recursive -Xtheirs . side && git reset --hard master && git pull -s recursive -X theirs . side && - git reset --hard master && ! git pull -s recursive -X bork . side + git reset --hard master && test_must_fail git pull -s recursive -X bork . side ' test_done diff --git a/t/t6038-merge-text-auto.sh b/t/t6038-merge-text-auto.sh new file mode 100755 index 0000000000..52d0dc4bb8 --- /dev/null +++ b/t/t6038-merge-text-auto.sh @@ -0,0 +1,189 @@ +#!/bin/sh + +test_description='CRLF merge conflict across text=auto change + +* [master] remove .gitattributes + ! [side] add line from b +-- + + [side] add line from b +* [master] remove .gitattributes +* [master^] add line from a +* [master~2] normalize file +*+ [side^] Initial +' + +. ./test-lib.sh + +test_expect_success setup ' + git config core.autocrlf false && + + echo first line | append_cr >file && + echo first line >control_file && + echo only line >inert_file && + + git add file control_file inert_file && + test_tick && + git commit -m "Initial" && + git tag initial && + git branch side && + + echo "* text=auto" >.gitattributes && + touch file && + git add .gitattributes file && + test_tick && + git commit -m "normalize file" && + + echo same line | append_cr >>file && + echo same line >>control_file && + git add file control_file && + test_tick && + git commit -m "add line from a" && + git tag a && + + git rm .gitattributes && + rm file && + git checkout file && + test_tick && + git commit -m "remove .gitattributes" && + git tag c && + + git checkout side && + echo same line | append_cr >>file && + echo same line >>control_file && + git add file control_file && + test_tick && + git commit -m "add line from b" && + git tag b && + + git checkout master +' + +test_expect_success 'set up fuzz_conflict() helper' ' + fuzz_conflict() { + sed -e "s/^\([<>=]......\) .*/\1/" "$@" + } +' + +test_expect_success 'Merge after setting text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes && + git reset --hard a && + git merge b && + test_cmp expected file +' + +test_expect_success 'Merge addition of text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes && + git reset --hard b && + git merge a && + test_cmp expected file +' + +test_expect_success 'Detect CRLF/LF conflict after setting text=auto' ' + q_to_cr <<-\EOF >expected && + <<<<<<< + first line + same line + ======= + first lineQ + same lineQ + >>>>>>> + EOF + + git config merge.renormalize false && + rm -f .gitattributes && + git reset --hard a && + test_must_fail git merge b && + fuzz_conflict file >file.fuzzy && + test_cmp expected file.fuzzy +' + +test_expect_success 'Detect LF/CRLF conflict from addition of text=auto' ' + q_to_cr <<-\EOF >expected && + <<<<<<< + first lineQ + same lineQ + ======= + first line + same line + >>>>>>> + EOF + + git config merge.renormalize false && + rm -f .gitattributes && + git reset --hard b && + test_must_fail git merge a && + fuzz_conflict file >file.fuzzy && + test_cmp expected file.fuzzy +' + +test_expect_failure 'checkout -m after setting text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes && + git reset --hard initial && + git checkout a -- . && + git checkout -m b && + test_cmp expected file +' + +test_expect_failure 'checkout -m addition of text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes file && + git reset --hard initial && + git checkout b -- . && + git checkout -m a && + test_cmp expected file +' + +test_expect_failure 'cherry-pick patch from after text=auto was added' ' + append_cr <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + git reset --hard b && + test_must_fail git cherry-pick a >err 2>&1 && + grep "[Nn]othing added" err && + test_cmp expected file +' + +test_expect_success 'Test delete/normalize conflict' ' + git checkout -f side && + git rm -fr . && + rm -f .gitattributes && + git reset --hard initial && + git rm file && + git commit -m "remove file" && + git checkout master && + git reset --hard a^ && + git merge side +' + +test_done diff --git a/t/t6050-replace.sh b/t/t6050-replace.sh index 203ffdb17a..dd917d76da 100755 --- a/t/t6050-replace.sh +++ b/t/t6050-replace.sh @@ -209,7 +209,7 @@ test_expect_success 'fetch branch with replacement' ' test_expect_success 'bisect and replacements' ' git bisect start $HASH7 $HASH1 && - test "$S" = "$(git rev-parse --verify HEAD)" && + test "$PARA3" = "$(git rev-parse --verify HEAD)" && git bisect reset && GIT_NO_REPLACE_OBJECTS=1 git bisect start $HASH7 $HASH1 && test "$HASH4" = "$(git rev-parse --verify HEAD)" && @@ -219,6 +219,12 @@ test_expect_success 'bisect and replacements' ' git bisect reset ' +test_expect_success 'index-pack and replacements' ' + git --no-replace-objects rev-list --objects HEAD | + git --no-replace-objects pack-objects test- && + git index-pack test-*.pack +' + # # test_done diff --git a/t/t6200-fmt-merge-msg.sh b/t/t6200-fmt-merge-msg.sh index 42f8ece097..71f6cad3c2 100755 --- a/t/t6200-fmt-merge-msg.sh +++ b/t/t6200-fmt-merge-msg.sh @@ -70,14 +70,13 @@ test_expect_success setup ' i=$(($i+1)) done && - git show-branch -' + git show-branch && -cat >expected <<\EOF -Merge branch 'left' -EOF + apos="'\''" +' -test_expect_success 'merge-msg test #1' ' +test_expect_success 'message for merging local branch' ' + echo "Merge branch ${apos}left${apos}" >expected && git checkout master && git fetch . left && @@ -86,11 +85,8 @@ test_expect_success 'merge-msg test #1' ' test_cmp expected actual ' -cat >expected <<EOF -Merge branch 'left' of $(pwd) -EOF - -test_expect_success 'merge-msg test #2' ' +test_expect_success 'message for merging external branch' ' + echo "Merge branch ${apos}left${apos} of $(pwd)" >expected && git checkout master && git fetch "$(pwd)" left && @@ -99,139 +95,140 @@ test_expect_success 'merge-msg test #2' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge branch 'left' - -* left: - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 -EOF +test_expect_success '[merge] summary/log configuration' ' + cat >expected <<-EOF && + Merge branch ${apos}left${apos} -test_expect_success 'merge-msg test #3-1' ' + * left: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF - git config --unset-all merge.log - git config --unset-all merge.summary git config merge.log true && + test_might_fail git config --unset-all merge.summary && git checkout master && test_tick && git fetch . left && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #3-2' ' + git fmt-merge-msg <.git/FETCH_HEAD >actual1 && - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary true && git checkout master && test_tick && git fetch . left && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -cat >expected <<\EOF -Merge branches 'left' and 'right' + git fmt-merge-msg <.git/FETCH_HEAD >actual2 && -* left: - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 + test_cmp expected actual1 && + test_cmp expected actual2 +' -* right: - Right #5 - Right #4 - Right #3 - Common #2 - Common #1 -EOF +test_expect_success 'fmt-merge-msg -m' ' + echo "Sync with left" >expected && + cat >expected.log <<-EOF && + Sync with left + + * ${apos}left${apos} of $(pwd): + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset merge.log && + test_might_fail git config --unset merge.summary && + git checkout master && + git fetch "$(pwd)" left && + git fmt-merge-msg -m "Sync with left" <.git/FETCH_HEAD >actual && + git fmt-merge-msg --log -m "Sync with left" \ + <.git/FETCH_HEAD >actual.log && + git config merge.log true && + git fmt-merge-msg -m "Sync with left" \ + <.git/FETCH_HEAD >actual.log-config && + git fmt-merge-msg --no-log -m "Sync with left" \ + <.git/FETCH_HEAD >actual.nolog && + + test_cmp expected actual && + test_cmp expected.log actual.log && + test_cmp expected.log actual.log-config && + test_cmp expected actual.nolog +' -test_expect_success 'merge-msg test #4-1' ' +test_expect_success 'setup: expected shortlog for two branches' ' + cat >expected <<-EOF + Merge branches ${apos}left${apos} and ${apos}right${apos} + + * left: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + + * right: + Right #5 + Right #4 + Right #3 + Common #2 + Common #1 + EOF +' - git config --unset-all merge.log - git config --unset-all merge.summary +test_expect_success 'shortlog for two branches' ' git config merge.log true && - + test_might_fail git config --unset-all merge.summary && git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual1 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #4-2' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary true && - git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual2 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #5-1' ' - - git config --unset-all merge.log - git config --unset-all merge.summary git config merge.log yes && - + test_might_fail git config --unset-all merge.summary && git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual3 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #5-2' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && - git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual4 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual + test_cmp expected actual1 && + test_cmp expected actual2 && + test_cmp expected actual3 && + test_cmp expected actual4 ' test_expect_success 'merge-msg -F' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && - git checkout master && test_tick && git fetch . left right && - git fmt-merge-msg -F .git/FETCH_HEAD >actual && test_cmp expected actual ' test_expect_success 'merge-msg -F in subdirectory' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && - git checkout master && test_tick && git fetch . left right && @@ -245,11 +242,11 @@ test_expect_success 'merge-msg -F in subdirectory' ' ' test_expect_success 'merge-msg with nothing to merge' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && + >empty && + ( cd remote && git checkout -b unrelated && @@ -258,22 +255,20 @@ test_expect_success 'merge-msg with nothing to merge' ' git fmt-merge-msg <.git/FETCH_HEAD >../actual ) && - test_cmp /dev/null actual + test_cmp empty actual ' -cat >expected <<\EOF -Merge tag 'tag-r3' - -* tag 'tag-r3': - Right #3 - Common #2 - Common #1 -EOF - test_expect_success 'merge-msg tag' ' + cat >expected <<-EOF && + Merge tag ${apos}tag-r3${apos} - git config --unset-all merge.log - git config --unset-all merge.summary + * tag ${apos}tag-r3${apos}: + Right #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset-all merge.log && git config merge.summary yes && git checkout master && @@ -284,26 +279,24 @@ test_expect_success 'merge-msg tag' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge tags 'tag-r3' and 'tag-l5' - -* tag 'tag-r3': - Right #3 - Common #2 - Common #1 - -* tag 'tag-l5': - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 -EOF - test_expect_success 'merge-msg two tags' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + cat >expected <<-EOF && + Merge tags ${apos}tag-r3${apos} and ${apos}tag-l5${apos} + + * tag ${apos}tag-r3${apos}: + Right #3 + Common #2 + Common #1 + + * tag ${apos}tag-l5${apos}: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset-all merge.log && git config merge.summary yes && git checkout master && @@ -314,26 +307,24 @@ test_expect_success 'merge-msg two tags' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge branch 'left', tag 'tag-r3' - -* tag 'tag-r3': - Right #3 - Common #2 - Common #1 - -* left: - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 -EOF - test_expect_success 'merge-msg tag and branch' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + cat >expected <<-EOF && + Merge branch ${apos}left${apos}, tag ${apos}tag-r3${apos} + + * tag ${apos}tag-r3${apos}: + Right #3 + Common #2 + Common #1 + + * left: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset-all merge.log && git config merge.summary yes && git checkout master && @@ -344,26 +335,27 @@ test_expect_success 'merge-msg tag and branch' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge branch 'long' - -* long: (35 commits) -EOF - test_expect_success 'merge-msg lots of commits' ' + { + cat <<-EOF && + Merge branch ${apos}long${apos} + + * long: (35 commits) + EOF + + i=29 && + while test $i -gt 9 + do + echo " $i" && + i=$(($i-1)) + done && + echo " ..." + } >expected && git checkout master && test_tick && git fetch . long && - i=29 && - while test $i -gt 9 - do - echo " $i" && - i=$(($i-1)) - done >>expected && - echo " ..." >>expected - git fmt-merge-msg <.git/FETCH_HEAD >actual && test_cmp expected actual ' diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh index 0da13a8d6b..2c55801ee8 100755 --- a/t/t7003-filter-branch.sh +++ b/t/t7003-filter-branch.sh @@ -143,11 +143,12 @@ test_expect_success 'more setup' ' test_expect_success 'use index-filter to move into a subdirectory' ' git branch directorymoved && git filter-branch -f --index-filter \ - "git ls-files -s | sed \"s-\\t-&newsubdir/-\" | + "git ls-files -s | sed \"s- -&newsubdir/-\" | GIT_INDEX_FILE=\$GIT_INDEX_FILE.new \ git update-index --index-info && mv \"\$GIT_INDEX_FILE.new\" \"\$GIT_INDEX_FILE\"" directorymoved && - test -z "$(git diff HEAD directorymoved:newsubdir)"' + git diff --exit-code HEAD directorymoved:newsubdir +' test_expect_success 'stops when msg filter fails' ' old=$(git rev-parse HEAD) && diff --git a/t/t7006-pager.sh b/t/t7006-pager.sh index 71d3ceff8f..fb744e3c4a 100755 --- a/t/t7006-pager.sh +++ b/t/t7006-pager.sh @@ -58,6 +58,21 @@ test_expect_success TTY 'some commands use a pager' ' test -e paginated.out ' +test_expect_failure TTY 'pager runs from subdir' ' + echo subdir/paginated.out >expected && + mkdir -p subdir && + rm -f paginated.out subdir/paginated.out && + ( + cd subdir && + test_terminal git log + ) && + { + ls paginated.out subdir/paginated.out || + : + } >actual && + test_cmp expected actual +' + test_expect_success TTY 'some commands do not use a pager' ' rm -f paginated.out || cleanup_fail && @@ -106,6 +121,45 @@ test_expect_success TTY 'no pager with --no-pager' ' ! test -e paginated.out ' +test_expect_success TTY 'configuration can disable pager' ' + rm -f paginated.out && + test_might_fail git config --unset pager.grep && + test_terminal git grep initial && + test -e paginated.out && + + rm -f paginated.out && + git config pager.grep false && + test_when_finished "git config --unset pager.grep" && + test_terminal git grep initial && + ! test -e paginated.out +' + +test_expect_success TTY 'git config uses a pager if configured to' ' + rm -f paginated.out && + git config pager.config true && + test_when_finished "git config --unset pager.config" && + test_terminal git config --list && + test -e paginated.out +' + +test_expect_success TTY 'configuration can enable pager (from subdir)' ' + rm -f paginated.out && + mkdir -p subdir && + git config pager.bundle true && + test_when_finished "git config --unset pager.bundle" && + + git bundle create test.bundle --all && + rm -f paginated.out subdir/paginated.out && + ( + cd subdir && + test_terminal git bundle unbundle ../test.bundle + ) && + { + test -e paginated.out || + test -e subdir/paginated.out + } +' + # A colored commit log will begin with an appropriate ANSI escape # for the first color; the text "commit" comes later. colorful() { @@ -369,4 +423,16 @@ test_GIT_PAGER_overrides expect_success test_must_fail 'git -p' test_doesnt_paginate expect_failure test_must_fail 'git -p nonsense' +test_pager_choices 'git shortlog' +test_expect_success 'setup: configure shortlog not to paginate' ' + git config pager.shortlog false +' +test_doesnt_paginate expect_success 'git shortlog' +test_no_local_config_subdir expect_success 'git shortlog' +test_default_pager expect_success 'git -p shortlog' +test_core_pager_subdir expect_success 'git -p shortlog' + +test_core_pager_subdir expect_success test_must_fail \ + 'git -p apply </dev/null' + test_done diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index eb8ca88cce..c0f9f3f705 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -61,7 +61,7 @@ test_expect_success 'git grep -Fi iLE a' ' # This test actually passes on platforms where regexec() supports the # flag REG_STARTEND. -test_expect_failure 'git grep ile a' ' +test_expect_success 'git grep ile a' ' git grep ile a ' diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 7d8ed68bef..3a43571cab 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -438,4 +438,20 @@ test_expect_success 'force removal of nested git work tree' ' ! test -d bar ' +test_expect_success 'git clean -e' ' + rm -fr repo && + mkdir repo && + ( + cd repo && + git init && + touch 1 2 3 known && + git add known && + git clean -f -e 1 -e 2 && + test -e 1 && + test -e 2 && + ! (test -e 3) && + test -e known + ) +' + test_done diff --git a/t/t7403-submodule-sync.sh b/t/t7403-submodule-sync.sh index 7538756487..02522f9627 100755 --- a/t/t7403-submodule-sync.sh +++ b/t/t7403-submodule-sync.sh @@ -14,7 +14,7 @@ test_expect_success setup ' echo file > file && git add file && test_tick && - git commit -m upstream + git commit -m upstream && git clone . super && git clone super submodule && (cd super && @@ -42,7 +42,7 @@ test_expect_success 'change submodule url' ' ) && mv submodule moved-submodule && (cd super && - git config -f .gitmodules submodule.submodule.url ../moved-submodule + git config -f .gitmodules submodule.submodule.url ../moved-submodule && test_tick && git commit -a -m moved-submodule ) @@ -58,6 +58,9 @@ test_expect_success '"git submodule sync" should update submodule URLs' ' (cd super-clone/submodule && git checkout master && git pull + ) && + (cd super-clone && + test -d "$(git config submodule.submodule.url)" ) ' diff --git a/t/t7405-submodule-merge.sh b/t/t7405-submodule-merge.sh index 4a7b8933f4..7e2e258950 100755 --- a/t/t7405-submodule-merge.sh +++ b/t/t7405-submodule-merge.sh @@ -54,21 +54,132 @@ test_expect_success setup ' git merge -s ours a ' -test_expect_success 'merging with modify/modify conflict' ' +# History setup +# +# b +# / \ +# a d +# \ / +# c +# +# a in the main repository records to sub-a in the submodule and +# analogous b and c. d should be automatically found by merging c into +# b in the main repository. +test_expect_success 'setup for merge search' ' + mkdir merge-search && + (cd merge-search && + git init && + mkdir sub && + (cd sub && + git init && + echo "file-a" > file-a && + git add file-a && + git commit -m "sub-a" && + git branch sub-a) && + git add sub && + git commit -m "a" && + git branch a && + + git checkout -b b && + (cd sub && + git checkout -b sub-b && + echo "file-b" > file-b && + git add file-b && + git commit -m "sub-b") && + git commit -a -m "b" && + + git checkout -b c a && + (cd sub && + git checkout -b sub-c sub-a && + echo "file-c" > file-c && + git add file-c && + git commit -m "sub-c") && + git commit -a -m "c" && - git checkout -b test1 a && - test_must_fail git merge b && - test -f .git/MERGE_MSG && - git diff && - test -n "$(git ls-files -u)" + git checkout -b d a && + (cd sub && + git checkout -b sub-d sub-b && + git merge sub-c) && + git commit -a -m "d" && + git branch test b) ' -test_expect_success 'merging with a modify/modify conflict between merge bases' ' +test_expect_success 'merge with one side as a fast-forward of the other' ' + (cd merge-search && + git checkout -b test-forward b && + git merge d && + git ls-tree test-forward sub | cut -f1 | cut -f3 -d" " > actual && + (cd sub && + git rev-parse sub-d > ../expect) && + test_cmp actual expect) +' +test_expect_success 'merging should conflict for non fast-forward' ' + (cd merge-search && + git checkout -b test-nonforward b && + (cd sub && + git rev-parse sub-d > ../expect) && + test_must_fail git merge c 2> actual && + grep $(cat expect) actual > /dev/null && + git reset --hard) +' + +test_expect_success 'merging should fail for ambiguous common parent' ' + (cd merge-search && + git checkout -b test-ambiguous b && + (cd sub && + git checkout -b ambiguous sub-b && + git merge sub-c && + git rev-parse sub-d > ../expect1 && + git rev-parse ambiguous > ../expect2) && + test_must_fail git merge c 2> actual && + grep $(cat expect1) actual > /dev/null && + grep $(cat expect2) actual > /dev/null && + git reset --hard) +' + +# in a situation like this +# +# submodule tree: +# +# sub-a --- sub-b --- sub-d +# +# main tree: +# +# e (sub-a) +# / +# bb (sub-b) +# \ +# f (sub-d) +# +# A merge between e and f should fail because one of the submodule +# commits (sub-a) does not descend from the submodule merge-base (sub-b). +# +test_expect_success 'merging should fail for changes that are backwards' ' + (cd merge-search && + git checkout -b bb a && + (cd sub && + git checkout sub-b) && + git commit -a -m "bb" && + + git checkout -b e bb && + (cd sub && + git checkout sub-a) && + git commit -a -m "e" && + + git checkout -b f bb && + (cd sub && + git checkout sub-d) && + git commit -a -m "f" && + + git checkout -b test-backward e && + test_must_fail git merge f) +' + +test_expect_success 'merging with a modify/modify conflict between merge bases' ' git reset --hard HEAD && git checkout -b test2 c && git merge d - ' test_done diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index 1382a8e58a..bfb4975e94 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -25,7 +25,7 @@ test_expect_success 'setup a submodule tree' ' echo file > file && git add file && test_tick && - git commit -m upstream + git commit -m upstream && git clone . super && git clone super submodule && git clone super rebasing && diff --git a/t/t7407-submodule-foreach.sh b/t/t7407-submodule-foreach.sh index db9365b645..905a8baae9 100755 --- a/t/t7407-submodule-foreach.sh +++ b/t/t7407-submodule-foreach.sh @@ -16,7 +16,7 @@ test_expect_success 'setup a submodule tree' ' echo file > file && git add file && test_tick && - git commit -m upstream + git commit -m upstream && git clone . super && git clone super submodule && ( @@ -30,7 +30,7 @@ test_expect_success 'setup a submodule tree' ' submodule.sub2 submodule.foo2 && git config -f .gitmodules --rename-section \ submodule.sub3 submodule.foo3 && - git add .gitmodules + git add .gitmodules && test_tick && git commit -m "submodules" && git submodule init sub1 && diff --git a/t/t7508-status.sh b/t/t7508-status.sh index a72fe3ae64..9c14b853c0 100755 --- a/t/t7508-status.sh +++ b/t/t7508-status.sh @@ -808,24 +808,38 @@ test_expect_success POSIXPERM 'status succeeds in a read-only repository' ' (exit $status) ' +(cd sm && echo > bar && git add bar && git commit -q -m 'Add bar' && cd .. && git add sm) +new_head=$(cd sm && git rev-parse --short=7 --verify HEAD) +touch .gitmodules + cat > expect << EOF # On branch master +# Changes to be committed: +# (use "git reset HEAD <file>..." to unstage) +# +# modified: sm +# # Changed but not updated: # (use "git add <file>..." to update what will be committed) # (use "git checkout -- <file>..." to discard changes in working directory) # # modified: dir1/modified # +# Submodule changes to be committed: +# +# * sm $head...$new_head (1): +# > Add bar +# # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked # expect # output # untracked -no changes added to commit (use "git add" and/or "git commit -a") EOF test_expect_success '--ignore-submodules=untracked suppresses submodules with untracked content' ' @@ -834,19 +848,89 @@ test_expect_success '--ignore-submodules=untracked suppresses submodules with un test_cmp expect output ' +test_expect_success '.gitmodules ignore=untracked suppresses submodules with untracked content' ' + git config diff.ignoreSubmodules dirty && + git status >output && + test_cmp expect output && + git config --add -f .gitmodules submodule.subname.ignore untracked && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname && + git config --unset diff.ignoreSubmodules +' + +test_expect_success '.git/config ignore=untracked suppresses submodules with untracked content' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore untracked && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname +' + test_expect_success '--ignore-submodules=dirty suppresses submodules with untracked content' ' git status --ignore-submodules=dirty > output && test_cmp expect output ' +test_expect_success '.gitmodules ignore=dirty suppresses submodules with untracked content' ' + git config diff.ignoreSubmodules dirty && + git status >output && + ! test -s actual && + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname && + git config --unset diff.ignoreSubmodules +' + +test_expect_success '.git/config ignore=dirty suppresses submodules with untracked content' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore dirty && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + test_expect_success '--ignore-submodules=dirty suppresses submodules with modified content' ' echo modified > sm/foo && git status --ignore-submodules=dirty > output && test_cmp expect output ' +test_expect_success '.gitmodules ignore=dirty suppresses submodules with modified content' ' + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success '.git/config ignore=dirty suppresses submodules with modified content' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore dirty && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + cat > expect << EOF # On branch master +# Changes to be committed: +# (use "git reset HEAD <file>..." to unstage) +# +# modified: sm +# # Changed but not updated: # (use "git add <file>..." to update what will be committed) # (use "git checkout -- <file>..." to discard changes in working directory) @@ -855,16 +939,21 @@ cat > expect << EOF # modified: dir1/modified # modified: sm (modified content) # +# Submodule changes to be committed: +# +# * sm $head...$new_head (1): +# > Add bar +# # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked # expect # output # untracked -no changes added to commit (use "git add" and/or "git commit -a") EOF test_expect_success "--ignore-submodules=untracked doesn't suppress submodules with modified content" ' @@ -872,10 +961,34 @@ test_expect_success "--ignore-submodules=untracked doesn't suppress submodules w test_cmp expect output ' +test_expect_success ".gitmodules ignore=untracked doesn't suppress submodules with modified content" ' + git config --add -f .gitmodules submodule.subname.ignore untracked && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success ".git/config ignore=untracked doesn't suppress submodules with modified content" ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore untracked && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + head2=$(cd sm && git commit -q -m "2nd commit" foo && git rev-parse --short=7 --verify HEAD) cat > expect << EOF # On branch master +# Changes to be committed: +# (use "git reset HEAD <file>..." to unstage) +# +# modified: sm +# # Changed but not updated: # (use "git add <file>..." to update what will be committed) # (use "git checkout -- <file>..." to discard changes in working directory) @@ -883,21 +996,26 @@ cat > expect << EOF # modified: dir1/modified # modified: sm (new commits) # +# Submodule changes to be committed: +# +# * sm $head...$new_head (1): +# > Add bar +# # Submodules changed but not updated: # -# * sm $head...$head2 (1): +# * sm $new_head...$head2 (1): # > 2nd commit # # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked # expect # output # untracked -no changes added to commit (use "git add" and/or "git commit -a") EOF test_expect_success "--ignore-submodules=untracked doesn't suppress submodule summary" ' @@ -905,10 +1023,47 @@ test_expect_success "--ignore-submodules=untracked doesn't suppress submodule su test_cmp expect output ' +test_expect_success ".gitmodules ignore=untracked doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore untracked && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success ".git/config ignore=untracked doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore untracked && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + test_expect_success "--ignore-submodules=dirty doesn't suppress submodule summary" ' git status --ignore-submodules=dirty > output && test_cmp expect output ' +test_expect_success ".gitmodules ignore=dirty doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success ".git/config ignore=dirty doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore dirty && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' cat > expect << EOF # On branch master @@ -921,6 +1076,7 @@ cat > expect << EOF # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked @@ -935,4 +1091,23 @@ test_expect_success "--ignore-submodules=all suppresses submodule summary" ' test_cmp expect output ' +test_expect_failure '.gitmodules ignore=all suppresses submodule summary' ' + git config --add -f .gitmodules submodule.subname.ignore all && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_failure '.git/config ignore=all suppresses submodule summary' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore all && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + test_done diff --git a/t/t7509-commit.sh b/t/t7509-commit.sh index 3ea33db6c7..643ab03f99 100755 --- a/t/t7509-commit.sh +++ b/t/t7509-commit.sh @@ -111,7 +111,7 @@ test_expect_success '--amend option with empty author' ' test_when_finished "git checkout Initial" && echo "Empty author test" >>foo && test_tick && - ! git commit -a -m "empty author" --amend 2>err && + test_must_fail git commit -a -m "empty author" --amend 2>err && grep "empty ident" err ' @@ -125,7 +125,7 @@ test_expect_success '--amend option with missing author' ' test_when_finished "git checkout Initial" && echo "Missing author test" >>foo && test_tick && - ! git commit -a -m "malformed author" --amend 2>err && + test_must_fail git commit -a -m "malformed author" --amend 2>err && grep "empty ident" err ' diff --git a/t/t7600-merge.sh b/t/t7600-merge.sh index cde8390c1b..b4f40e4c3a 100755 --- a/t/t7600-merge.sh +++ b/t/t7600-merge.sh @@ -5,189 +5,103 @@ test_description='git merge -Testing basic merge operations/option parsing.' +Testing basic merge operations/option parsing. + +! [c0] commit 0 + ! [c1] commit 1 + ! [c2] commit 2 + ! [c3] commit 3 + ! [c4] c4 + ! [c5] c5 + ! [c6] c6 + * [master] Merge commit 'c1' +-------- + - [master] Merge commit 'c1' + + * [c1] commit 1 + + [c6] c6 + + [c5] c5 + ++ [c4] c4 + ++++ [c3] commit 3 + + [c2] commit 2 ++++++++* [c0] commit 0 +' . ./test-lib.sh -cat >file <<EOF -1 -2 -3 -4 -5 -6 -7 -8 -9 -EOF - -cat >file.1 <<EOF -1 X -2 -3 -4 -5 -6 -7 -8 -9 -EOF - -cat >file.5 <<EOF -1 -2 -3 -4 -5 X -6 -7 -8 -9 -EOF - -cat >file.9 <<EOF -1 -2 -3 -4 -5 -6 -7 -8 -9 X -EOF - -cat >result.1 <<EOF -1 X -2 -3 -4 -5 -6 -7 -8 -9 -EOF - -cat >result.1-5 <<EOF -1 X -2 -3 -4 -5 X -6 -7 -8 -9 -EOF - -cat >result.1-5-9 <<EOF -1 X -2 -3 -4 -5 X -6 -7 -8 -9 X -EOF - -create_merge_msgs() { - echo "Merge commit 'c2'" >msg.1-5 && - echo "Merge commit 'c2'; commit 'c3'" >msg.1-5-9 && - echo "Squashed commit of the following:" >squash.1 && - echo >>squash.1 && - git log --no-merges ^HEAD c1 >>squash.1 && - echo "Squashed commit of the following:" >squash.1-5 && - echo >>squash.1-5 && - git log --no-merges ^HEAD c2 >>squash.1-5 && - echo "Squashed commit of the following:" >squash.1-5-9 && - echo >>squash.1-5-9 && - git log --no-merges ^HEAD c2 c3 >>squash.1-5-9 && - echo > msg.nolog && - echo "* commit 'c3':" >msg.log && - echo " commit 3" >>msg.log && - echo >>msg.log -} - -verify_diff() { - if ! test_cmp "$1" "$2" - then - echo "$3" - false - fi -} - -verify_merge() { - verify_diff "$2" "$1" "[OOPS] bad merge result" && - if test $(git ls-files -u | wc -l) -gt 0 - then - echo "[OOPS] unmerged files" - false - fi && - if test_must_fail git diff --exit-code - then - echo "[OOPS] working tree != index" - false - fi && - if test -n "$3" - then - git show -s --pretty=format:%s HEAD >msg.act && - verify_diff "$3" msg.act "[OOPS] bad merge message" - fi -} - -verify_head() { - if test "$1" != "$(git rev-parse HEAD)" - then - echo "[OOPS] HEAD != $1" - false - fi -} - -verify_parents() { - i=1 - while test $# -gt 0 - do - if test "$1" != "$(git rev-parse HEAD^$i)" +test_expect_success 'set up test data and helpers' ' + printf "%s\n" 1 2 3 4 5 6 7 8 9 >file && + printf "%s\n" "1 X" 2 3 4 5 6 7 8 9 >file.1 && + printf "%s\n" 1 2 3 4 "5 X" 6 7 8 9 >file.5 && + printf "%s\n" 1 2 3 4 5 6 7 8 "9 X" >file.9 && + printf "%s\n" "1 X" 2 3 4 5 6 7 8 9 >result.1 && + printf "%s\n" "1 X" 2 3 4 "5 X" 6 7 8 9 >result.1-5 && + printf "%s\n" "1 X" 2 3 4 "5 X" 6 7 8 "9 X" >result.1-5-9 && + + create_merge_msgs() { + echo "Merge commit '\''c2'\''" >msg.1-5 && + echo "Merge commit '\''c2'\''; commit '\''c3'\''" >msg.1-5-9 && + { + echo "Squashed commit of the following:" && + echo && + git log --no-merges ^HEAD c1 + } >squash.1 && + { + echo "Squashed commit of the following:" && + echo && + git log --no-merges ^HEAD c2 + } >squash.1-5 && + { + echo "Squashed commit of the following:" && + echo && + git log --no-merges ^HEAD c2 c3 + } >squash.1-5-9 && + echo >msg.nolog && + { + echo "* commit '\''c3'\'':" && + echo " commit 3" && + echo + } >msg.log + } && + + verify_merge() { + test_cmp "$2" "$1" && + git update-index --refresh && + git diff --exit-code && + if test -n "$3" then - echo "[OOPS] HEAD^$i != $1" - return 1 + git show -s --pretty=format:%s HEAD >msg.act && + test_cmp "$3" msg.act fi - i=$(expr $i + 1) - shift - done -} - -verify_mergeheads() { - i=1 - if ! test -f .git/MERGE_HEAD - then - echo "[OOPS] MERGE_HEAD is missing" - false - fi && - while test $# -gt 0 - do - head=$(head -n $i .git/MERGE_HEAD | sed -ne \$p) - if test "$1" != "$head" - then - echo "[OOPS] MERGE_HEAD $i != $1" + } && + + verify_head() { + echo "$1" >head.expected && + git rev-parse HEAD >head.actual && + test_cmp head.expected head.actual + } && + + verify_parents() { + printf "%s\n" "$@" >parents.expected && + >parents.actual && + i=1 && + while test $i -le $# + do + git rev-parse HEAD^$i >>parents.actual && + i=$(expr $i + 1) || return 1 - fi - i=$(expr $i + 1) - shift - done -} + done && + test_cmp parents.expected parents.actual + } && -verify_no_mergehead() { - if test -f .git/MERGE_HEAD - then - echo "[OOPS] MERGE_HEAD exists" - false - fi -} + verify_mergeheads() { + printf "%s\n" "$@" >mergehead.expected && + test_cmp mergehead.expected .git/MERGE_HEAD + } && + verify_no_mergehead() { + ! test -e .git/MERGE_HEAD + } +' test_expect_success 'setup' ' git add file && @@ -219,7 +133,7 @@ test_expect_success 'setup' ' create_merge_msgs ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'test option parsing' ' test_must_fail git merge -$ c1 && @@ -235,13 +149,19 @@ test_expect_success 'reject non-strategy with a git-merge-foo name' ' ' test_expect_success 'merge c0 with c1' ' + echo "OBJID HEAD@{0}: merge c1: Fast-forward" >reflog.expected && + git reset --hard c0 && git merge c1 && verify_merge file result.1 && - verify_head "$c1" + verify_head "$c1" && + + git reflog -1 >reflog.actual && + sed "s/$_x05[0-9a-f]*/OBJID/g" reflog.actual >reflog.fuzzy && + test_cmp reflog.expected reflog.fuzzy ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 with --ff-only' ' git reset --hard c0 && @@ -251,7 +171,28 @@ test_expect_success 'merge c0 with c1 with --ff-only' ' verify_head "$c1" ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' + +test_expect_success 'merge from unborn branch' ' + git checkout -f master && + test_might_fail git branch -D kid && + + echo "OBJID HEAD@{0}: initial pull" >reflog.expected && + + git checkout --orphan kid && + test_when_finished "git checkout -f master" && + git rm -fr . && + test_tick && + git merge --ff-only c1 && + verify_merge file result.1 && + verify_head "$c1" && + + git reflog -1 >reflog.actual && + sed "s/$_x05[0-9a-f][0-9a-f]/OBJID/g" reflog.actual >reflog.fuzzy && + test_cmp reflog.expected reflog.fuzzy +' + +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2' ' git reset --hard c1 && @@ -261,7 +202,7 @@ test_expect_success 'merge c1 with c2' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 and c3' ' git reset --hard c1 && @@ -271,7 +212,7 @@ test_expect_success 'merge c1 with c2 and c3' ' verify_parents $c1 $c2 $c3 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'failing merges with --ff-only' ' git reset --hard c1 && @@ -288,7 +229,7 @@ test_expect_success 'merge c0 with c1 (no-commit)' ' verify_head $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (no-commit)' ' git reset --hard c1 && @@ -298,7 +239,7 @@ test_expect_success 'merge c1 with c2 (no-commit)' ' verify_mergeheads $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 and c3 (no-commit)' ' git reset --hard c1 && @@ -308,7 +249,7 @@ test_expect_success 'merge c1 with c2 and c3 (no-commit)' ' verify_mergeheads $c2 $c3 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 (squash)' ' git reset --hard c0 && @@ -316,10 +257,10 @@ test_expect_success 'merge c0 with c1 (squash)' ' verify_merge file result.1 && verify_head $c0 && verify_no_mergehead && - verify_diff squash.1 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 (squash, ff-only)' ' git reset --hard c0 && @@ -327,10 +268,10 @@ test_expect_success 'merge c0 with c1 (squash, ff-only)' ' verify_merge file result.1 && verify_head $c0 && verify_no_mergehead && - verify_diff squash.1 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (squash)' ' git reset --hard c1 && @@ -338,17 +279,17 @@ test_expect_success 'merge c1 with c2 (squash)' ' verify_merge file result.1-5 && verify_head $c1 && verify_no_mergehead && - verify_diff squash.1-5 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1-5 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'unsuccesful merge of c1 with c2 (squash, ff-only)' ' git reset --hard c1 && test_must_fail git merge --squash --ff-only c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 and c3 (squash)' ' git reset --hard c1 && @@ -356,10 +297,10 @@ test_expect_success 'merge c1 with c2 and c3 (squash)' ' verify_merge file result.1-5-9 && verify_head $c1 && verify_no_mergehead && - verify_diff squash.1-5-9 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1-5-9 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (no-commit in config)' ' git reset --hard c1 && @@ -370,7 +311,7 @@ test_expect_success 'merge c1 with c2 (no-commit in config)' ' verify_mergeheads $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (squash in config)' ' git reset --hard c1 && @@ -379,10 +320,10 @@ test_expect_success 'merge c1 with c2 (squash in config)' ' verify_merge file result.1-5 && verify_head $c1 && verify_no_mergehead && - verify_diff squash.1-5 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1-5 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'override config option -n with --summary' ' git reset --hard c1 && @@ -412,7 +353,7 @@ test_expect_success 'override config option -n with --stat' ' fi ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'override config option --stat' ' git reset --hard c1 && @@ -428,7 +369,7 @@ test_expect_success 'override config option --stat' ' fi ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (override --no-commit)' ' git reset --hard c1 && @@ -439,7 +380,7 @@ test_expect_success 'merge c1 with c2 (override --no-commit)' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (override --squash)' ' git reset --hard c1 && @@ -450,7 +391,7 @@ test_expect_success 'merge c1 with c2 (override --squash)' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 (no-ff)' ' git reset --hard c0 && @@ -461,7 +402,7 @@ test_expect_success 'merge c0 with c1 (no-ff)' ' verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'combining --squash and --no-ff is refused' ' test_must_fail git merge --squash --no-ff c1 && @@ -485,20 +426,20 @@ test_expect_success 'merge log message' ' git reset --hard c0 && git merge --no-log c2 && git show -s --pretty=format:%b HEAD >msg.act && - verify_diff msg.nolog msg.act "[OOPS] bad merge log message" && + test_cmp msg.nolog msg.act && git merge --log c3 && git show -s --pretty=format:%b HEAD >msg.act && - verify_diff msg.log msg.act "[OOPS] bad merge log message" && + test_cmp msg.log msg.act && git reset --hard HEAD^ && git config merge.log yes && git merge c3 && git show -s --pretty=format:%b HEAD >msg.act && - verify_diff msg.log msg.act "[OOPS] bad merge log message" + test_cmp msg.log msg.act ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c0, c2, c0, and c1' ' git reset --hard c1 && @@ -509,7 +450,7 @@ test_expect_success 'merge c1 with c0, c2, c0, and c1' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c0, c2, c0, and c1' ' git reset --hard c1 && @@ -520,7 +461,7 @@ test_expect_success 'merge c1 with c0, c2, c0, and c1' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c1 and c2' ' git reset --hard c1 && @@ -531,7 +472,7 @@ test_expect_success 'merge c1 with c1 and c2' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge fast-forward in a dirty tree' ' git reset --hard c0 && @@ -541,16 +482,16 @@ test_expect_success 'merge fast-forward in a dirty tree' ' git merge c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'in-index merge' ' git reset --hard c0 && - git merge --no-ff -s resolve c1 > out && + git merge --no-ff -s resolve c1 >out && grep "Wonderful." out && verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'refresh the index before merging' ' git reset --hard c1 && @@ -558,31 +499,39 @@ test_expect_success 'refresh the index before merging' ' git merge c3 ' -cat >expected <<EOF -Merge branch 'c5' (early part) +cat >expected.branch <<\EOF +Merge branch 'c5-branch' (early part) +EOF +cat >expected.tag <<\EOF +Merge commit 'c5~1' EOF test_expect_success 'merge early part of c2' ' git reset --hard c3 && - echo c4 > c4.c && + echo c4 >c4.c && git add c4.c && git commit -m c4 && git tag c4 && - echo c5 > c5.c && + echo c5 >c5.c && git add c5.c && git commit -m c5 && git tag c5 && git reset --hard c3 && - echo c6 > c6.c && + echo c6 >c6.c && git add c6.c && git commit -m c6 && git tag c6 && + git branch -f c5-branch c5 && + git merge c5-branch~1 && + git show -s --pretty=format:%s HEAD >actual.branch && + git reset --keep HEAD^ && git merge c5~1 && - git show -s --pretty=format:%s HEAD > actual && - test_cmp actual expected + git show -s --pretty=format:%s HEAD >actual.tag && + test_cmp expected.branch actual.branch && + test_cmp expected.tag actual.tag ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge --no-ff --no-commit && commit' ' git reset --hard c0 && @@ -591,13 +540,13 @@ test_expect_success 'merge --no-ff --no-commit && commit' ' verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'amending no-ff merge commit' ' EDITOR=: git commit --amend && verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_done diff --git a/t/t7606-merge-custom.sh b/t/t7606-merge-custom.sh index 52a451dd57..8e8c4d7246 100755 --- a/t/t7606-merge-custom.sh +++ b/t/t7606-merge-custom.sh @@ -1,49 +1,93 @@ #!/bin/sh -test_description='git merge +test_description="git merge -Testing a custom strategy.' +Testing a custom strategy. + +* (HEAD, master) Merge commit 'c3' +|\ +| * (tag: c3) c3 +* | (tag: c1) c1 +|/ +| * tag: c2) c2 +|/ +* (tag: c0) c0 +" . ./test-lib.sh -cat >git-merge-theirs <<EOF -#!$SHELL_PATH -eval git read-tree --reset -u \\\$\$# -EOF -chmod +x git-merge-theirs -PATH=.:$PATH -export PATH +test_expect_success 'set up custom strategy' ' + cat >git-merge-theirs <<-EOF && + #!$SHELL_PATH + eval git read-tree --reset -u \\\$\$# + EOF + + chmod +x git-merge-theirs && + PATH=.:$PATH && + export PATH +' test_expect_success 'setup' ' - echo c0 >c0.c && - git add c0.c && - git commit -m c0 && - git tag c0 && - echo c1 >c1.c && - git add c1.c && - git commit -m c1 && - git tag c1 && - git reset --hard c0 && + test_commit c0 c0.c && + test_commit c1 c1.c && + git reset --keep c0 && echo c1c1 >c1.c && - echo c2 >c2.c && - git add c1.c c2.c && - git commit -m c2 && - git tag c2 + git add c1.c && + test_commit c2 c2.c && + git reset --keep c0 && + test_commit c3 c3.c ' test_expect_success 'merge c2 with a custom strategy' ' git reset --hard c1 && + + git rev-parse c1 >head.old && + git rev-parse c2 >second-parent.expected && + git rev-parse c2^{tree} >tree.expected && git merge -s theirs c2 && - test "$(git rev-parse c1)" != "$(git rev-parse HEAD)" && - test "$(git rev-parse c1)" = "$(git rev-parse HEAD^1)" && - test "$(git rev-parse c2)" = "$(git rev-parse HEAD^2)" && - test "$(git rev-parse c2^{tree})" = "$(git rev-parse HEAD^{tree})" && + + git rev-parse HEAD >head.new && + git rev-parse HEAD^1 >first-parent && + git rev-parse HEAD^2 >second-parent && + git rev-parse HEAD^{tree} >tree && + git update-index --refresh && git diff --exit-code && git diff --exit-code c2 HEAD && git diff --exit-code c2 && + + ! test_cmp head.old head.new && + test_cmp head.old first-parent && + test_cmp second-parent.expected second-parent && + test_cmp tree.expected tree && test -f c0.c && grep c1c1 c1.c && test -f c2.c ' +test_expect_success 'trivial merge with custom strategy' ' + git reset --hard c1 && + + git rev-parse c1 >head.old && + git rev-parse c3 >second-parent.expected && + git rev-parse c3^{tree} >tree.expected && + git merge -s theirs c3 && + + git rev-parse HEAD >head.new && + git rev-parse HEAD^1 >first-parent && + git rev-parse HEAD^2 >second-parent && + git rev-parse HEAD^{tree} >tree && + git update-index --refresh && + git diff --exit-code && + git diff --exit-code c3 HEAD && + git diff --exit-code c3 && + + ! test_cmp head.old head.new && + test_cmp head.old first-parent && + test_cmp second-parent.expected second-parent && + test_cmp tree.expected tree && + test -f c0.c && + ! test -e c1.c && + test -f c3.c +' + test_done diff --git a/t/t7607-merge-overwrite.sh b/t/t7607-merge-overwrite.sh index 49f4e1599a..d82349a6a8 100755 --- a/t/t7607-merge-overwrite.sh +++ b/t/t7607-merge-overwrite.sh @@ -31,7 +31,7 @@ test_expect_success 'setup' ' test_expect_success 'will not overwrite untracked file' ' git reset --hard c1 && cat important > c2.c && - ! git merge c2 && + test_must_fail git merge c2 && test_cmp important c2.c ' @@ -39,7 +39,7 @@ test_expect_success 'will not overwrite new file' ' git reset --hard c1 && cat important > c2.c && git add c2.c && - ! git merge c2 && + test_must_fail git merge c2 && test_cmp important c2.c ' @@ -48,7 +48,7 @@ test_expect_success 'will not overwrite staged changes' ' cat important > c2.c && git add c2.c && rm c2.c && - ! git merge c2 && + test_must_fail git merge c2 && git checkout c2.c && test_cmp important c2.c ' @@ -58,7 +58,7 @@ test_expect_success 'will not overwrite removed file' ' git rm c1.c && git commit -m "rm c1.c" && cat important > c1.c && - ! git merge c1a && + test_must_fail git merge c1a && test_cmp important c1.c ' @@ -68,7 +68,7 @@ test_expect_success 'will not overwrite re-added file' ' git commit -m "rm c1.c" && cat important > c1.c && git add c1.c && - ! git merge c1a && + test_must_fail git merge c1a && test_cmp important c1.c ' @@ -79,7 +79,7 @@ test_expect_success 'will not overwrite removed file with staged changes' ' cat important > c1.c && git add c1.c && rm c1.c && - ! git merge c1a && + test_must_fail git merge c1a && git checkout c1.c && test_cmp important c1.c ' diff --git a/t/t7609-merge-co-error-msgs.sh b/t/t7609-merge-co-error-msgs.sh new file mode 100755 index 0000000000..114d2bd785 --- /dev/null +++ b/t/t7609-merge-co-error-msgs.sh @@ -0,0 +1,133 @@ +#!/bin/sh + +test_description='unpack-trees error messages' + +. ./test-lib.sh + + +test_expect_success 'setup' ' + echo one >one && + git add one && + git commit -a -m First && + + git checkout -b branch && + echo two >two && + echo three >three && + echo four >four && + echo five >five && + git add two three four five && + git commit -m Second && + + git checkout master && + echo other >two && + echo other >three && + echo other >four && + echo other >five +' + +cat >expect <<\EOF +error: The following untracked working tree files would be overwritten by merge: + two + three + four + five +Please move or remove them before you can merge. +EOF + +test_expect_success 'untracked files overwritten by merge (fast and non-fast forward)' ' + test_must_fail git merge branch 2>out && + test_cmp out expect && + git commit --allow-empty -m empty && + ( + GIT_MERGE_VERBOSITY=0 && + export GIT_MERGE_VERBOSITY && + test_must_fail git merge branch 2>out2 + ) && + test_cmp out2 expect && + git reset --hard HEAD^ +' + +cat >expect <<\EOF +error: Your local changes to the following files would be overwritten by merge: + two + three + four +Please, commit your changes or stash them before you can merge. +error: The following untracked working tree files would be overwritten by merge: + five +Please move or remove them before you can merge. +EOF + +test_expect_success 'untracked files or local changes ovewritten by merge' ' + git add two && + git add three && + git add four && + test_must_fail git merge branch 2>out && + test_cmp out expect +' + +cat >expect <<\EOF +error: Your local changes to the following files would be overwritten by checkout: + rep/two + rep/one +Please, commit your changes or stash them before you can switch branches. +EOF + +test_expect_success 'cannot switch branches because of local changes' ' + git add five && + mkdir rep && + echo one >rep/one && + echo two >rep/two && + git add rep/one rep/two && + git commit -m Fourth && + git checkout master && + echo uno >rep/one && + echo dos >rep/two && + test_must_fail git checkout branch 2>out && + test_cmp out expect +' + +cat >expect <<\EOF +error: Your local changes to the following files would be overwritten by checkout: + rep/two + rep/one +Please, commit your changes or stash them before you can switch branches. +EOF + +test_expect_success 'not uptodate file porcelain checkout error' ' + git add rep/one rep/two && + test_must_fail git checkout branch 2>out && + test_cmp out expect +' + +cat >expect <<\EOF +error: Updating the following directories would lose untracked files in it: + rep2 + rep + +EOF + +test_expect_success 'not_uptodate_dir porcelain checkout error' ' + git init uptodate && + cd uptodate && + mkdir rep && + mkdir rep2 && + touch rep/foo && + touch rep2/foo && + git add rep/foo rep2/foo && + git commit -m init && + git checkout -b branch && + git rm rep -r && + git rm rep2 -r && + >rep && + >rep2 && + git add rep rep2&& + git commit -m "added test as a file" && + git checkout master && + >rep/untracked-file && + >rep2/untracked-file && + test_must_fail git checkout branch 2>out && + test_cmp out ../expect +' + +test_done diff --git a/t/t7610-mergetool.sh b/t/t7610-mergetool.sh index e768c3eb2d..3bd74042ef 100755 --- a/t/t7610-mergetool.sh +++ b/t/t7610-mergetool.sh @@ -14,6 +14,7 @@ Testing basic merge tool invocation' # running mergetool test_expect_success 'setup' ' + git config rerere.enabled true && echo master >file1 && mkdir subdir && echo master sub >subdir/file3 && @@ -67,23 +68,47 @@ test_expect_success 'mergetool crlf' ' ' test_expect_success 'mergetool in subdir' ' - git checkout -b test3 branch1 - cd subdir && ( - test_must_fail git merge master >/dev/null 2>&1 && - ( yes "" | git mergetool file3 >/dev/null 2>&1 ) && - test "$(cat file3)" = "master new sub" ) + git checkout -b test3 branch1 && + ( + cd subdir && + test_must_fail git merge master >/dev/null 2>&1 && + ( yes "" | git mergetool file3 >/dev/null 2>&1 ) && + test "$(cat file3)" = "master new sub" + ) ' -# We can't merge files from parent directories when running mergetool -# from a subdir. Is this a bug? -# -#test_expect_failure 'mergetool in subdir' ' -# cd subdir && ( -# ( yes "" | git mergetool ../file1 >/dev/null 2>&1 ) && -# ( yes "" | git mergetool ../file2 >/dev/null 2>&1 ) && -# test "$(cat ../file1)" = "master updated" && -# test "$(cat ../file2)" = "master new" && -# git commit -m "branch1 resolved with mergetool - subdir" ) -#' +test_expect_success 'mergetool on file in parent dir' ' + ( + cd subdir && + ( yes "" | git mergetool ../file1 >/dev/null 2>&1 ) && + ( yes "" | git mergetool ../file2 >/dev/null 2>&1 ) && + test "$(cat ../file1)" = "master updated" && + test "$(cat ../file2)" = "master new" && + git commit -m "branch1 resolved with mergetool - subdir" + ) +' + +test_expect_success 'mergetool skips autoresolved' ' + git checkout -b test4 branch1 && + test_must_fail git merge master && + test -n "$(git ls-files -u)" && + output="$(git mergetool --no-prompt)" && + test "$output" = "No files need merging" && + git reset --hard +' + +test_expect_success 'mergetool merges all from subdir' ' + ( + cd subdir && + git config rerere.enabled false && + test_must_fail git merge master && + git mergetool --no-prompt && + test "$(cat ../file1)" = "master updated" && + test "$(cat ../file2)" = "master new" && + test "$(cat file3)" = "master new sub" && + git add ../file1 ../file2 file3 && + git commit -m "branch2 resolved by mergetool from subdir" + ) +' test_done diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 8a6322765c..023f225a4b 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -65,7 +65,7 @@ do test_expect_success "grep -w $L (w)" ' : >expected && - ! git grep -n -w -e "^w" >actual && + test_must_fail git grep -n -w -e "^w" >actual && test_cmp expected actual ' diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh new file mode 100755 index 0000000000..a713dfc50b --- /dev/null +++ b/t/t9010-svn-fe.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='check svn dumpfile importer' + +. ./lib-git-svn.sh + +test_dump() { + label=$1 + dump=$2 + test_expect_success "$dump" ' + svnadmin create "$label-svn" && + svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" && + svn_cmd export "file://$PWD/$label-svn" "$label-svnco" && + git init "$label-git" && + test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" && + ( + cd "$label-git" && + git fast-import < ../"$label.fe" + ) && + ( + cd "$label-svnco" && + git init && + git add . && + git fetch "../$label-git" master && + git diff --exit-code FETCH_HEAD + ) + ' +} + +test_dump simple t9135/svn.dump + +test_done diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh index 13766ab160..d5adae640b 100755 --- a/t/t9100-git-svn-basic.sh +++ b/t/t9100-git-svn-basic.sh @@ -271,6 +271,17 @@ test_expect_success 'able to dcommit to a subdirectory' " test -z \"\`git diff refs/heads/my-bar refs/remotes/bar\`\" " +test_expect_success 'dcommit should not fail with a touched file' ' + test_commit "commit-new-file-foo2" foo2 && + test-chmtime =-60 foo && + git svn dcommit +' + +test_expect_success 'rebase should not fail with a touched file' ' + test-chmtime =-60 foo && + git svn rebase +' + test_expect_success 'able to set-tree to a subdirectory' " echo cba > d && git update-index d && diff --git a/t/t9130-git-svn-authors-file.sh b/t/t9130-git-svn-authors-file.sh index 134411e0a5..3c4f31925f 100755 --- a/t/t9130-git-svn-authors-file.sh +++ b/t/t9130-git-svn-authors-file.sh @@ -20,7 +20,7 @@ test_expect_success 'setup svnrepo' ' ' test_expect_success 'start import with incomplete authors file' ' - ! git svn clone --authors-file=svn-authors "$svnrepo" x + test_must_fail git svn clone --authors-file=svn-authors "$svnrepo" x ' test_expect_success 'imported 2 revisions successfully' ' @@ -63,7 +63,7 @@ test_expect_success 'authors-file against globs' ' ' test_expect_success 'fetch fails on ee' ' - ( cd aa-work && ! git svn fetch --authors-file=../svn-authors ) + ( cd aa-work && test_must_fail git svn fetch --authors-file=../svn-authors ) ' tmp_config_get () { diff --git a/t/t9139-git-svn-non-utf8-commitencoding.sh b/t/t9139-git-svn-non-utf8-commitencoding.sh index f337959ccc..22d80b0be2 100755 --- a/t/t9139-git-svn-non-utf8-commitencoding.sh +++ b/t/t9139-git-svn-non-utf8-commitencoding.sh @@ -39,7 +39,7 @@ do ( cd $H && git config --unset i18n.commitencoding && - ! git svn dcommit + test_must_fail git svn dcommit ) ' done diff --git a/t/t9140-git-svn-reset.sh b/t/t9140-git-svn-reset.sh index 0735526d4b..e855904629 100755 --- a/t/t9140-git-svn-reset.sh +++ b/t/t9140-git-svn-reset.sh @@ -41,7 +41,7 @@ test_expect_success 'modify hidden file in SVN repo' ' test_expect_success 'fetch fails on modified hidden file' ' ( cd g && git svn find-rev refs/remotes/git-svn > ../expect && - ! git svn fetch 2> ../errors && + test_must_fail git svn fetch 2> ../errors && git svn find-rev refs/remotes/git-svn > ../expect2 ) && fgrep "not found in commit" errors && test_cmp expect expect2 diff --git a/t/t9155-git-svn-fetch-deleted-tag.sh b/t/t9155-git-svn-fetch-deleted-tag.sh new file mode 100755 index 0000000000..a486a98f84 --- /dev/null +++ b/t/t9155-git-svn-fetch-deleted-tag.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +test_description='git svn fetch deleted tag' + +. ./lib-git-svn.sh + +test_expect_success 'setup svn repo' ' + mkdir -p import/trunk/subdir && + mkdir -p import/branches && + mkdir -p import/tags && + echo "base" >import/trunk/subdir/file && + svn_cmd import -m "import for git svn" import "$svnrepo" && + rm -rf import && + + svn_cmd mkdir -m "create mybranch directory" "$svnrepo/branches/mybranch" && + svn_cmd cp -m "create branch mybranch" "$svnrepo/trunk" "$svnrepo/branches/mybranch/trunk" && + + svn_cmd co "$svnrepo/trunk" svn_project && + (cd svn_project && + echo "trunk change" >>subdir/file && + svn_cmd ci -m "trunk change" subdir/file && + + svn_cmd switch "$svnrepo/branches/mybranch/trunk" && + echo "branch change" >>subdir/file && + svn_cmd ci -m "branch change" subdir/file + ) && + + svn_cmd cp -m "create mytag attempt 1" -r5 "$svnrepo/trunk/subdir" "$svnrepo/tags/mytag" && + svn_cmd rm -m "delete mytag attempt 1" "$svnrepo/tags/mytag" && + svn_cmd cp -m "create mytag attempt 2" -r5 "$svnrepo/branches/mybranch/trunk/subdir" "$svnrepo/tags/mytag" +' + +test_expect_success 'fetch deleted tags from same revision with checksum error' ' + git svn init --stdlayout "$svnrepo" git_project && + cd git_project && + git svn fetch && + + git diff --exit-code mybranch:trunk/subdir/file tags/mytag:file && + git diff --exit-code master:subdir/file tags/mytag^:file +' + +test_done diff --git a/t/t9156-git-svn-fetch-deleted-tag-2.sh b/t/t9156-git-svn-fetch-deleted-tag-2.sh new file mode 100755 index 0000000000..5ce7e2f3b0 --- /dev/null +++ b/t/t9156-git-svn-fetch-deleted-tag-2.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +test_description='git svn fetch deleted tag 2' + +. ./lib-git-svn.sh + +test_expect_success 'setup svn repo' ' + mkdir -p import/branches && + mkdir -p import/tags && + mkdir -p import/trunk/subdir1 && + mkdir -p import/trunk/subdir2 && + mkdir -p import/trunk/subdir3 && + echo "file1" >import/trunk/subdir1/file && + echo "file2" >import/trunk/subdir2/file && + echo "file3" >import/trunk/subdir3/file && + svn_cmd import -m "import for git svn" import "$svnrepo" && + rm -rf import && + + svn_cmd co "$svnrepo/trunk" svn_project && + (cd svn_project && + echo "change1" >>subdir1/file && + echo "change2" >>subdir2/file && + echo "change3" >>subdir3/file && + svn_cmd ci -m "change" . + ) && + + svn_cmd cp -m "create mytag 1" -r2 "$svnrepo/trunk/subdir1" "$svnrepo/tags/mytag" && + svn_cmd rm -m "delete mytag 1" "$svnrepo/tags/mytag" && + svn_cmd cp -m "create mytag 2" -r2 "$svnrepo/trunk/subdir2" "$svnrepo/tags/mytag" && + svn_cmd rm -m "delete mytag 2" "$svnrepo/tags/mytag" && + svn_cmd cp -m "create mytag 3" -r2 "$svnrepo/trunk/subdir3" "$svnrepo/tags/mytag" +' + +test_expect_success 'fetch deleted tags from same revision with no checksum error' ' + git svn init --stdlayout "$svnrepo" git_project && + cd git_project && + git svn fetch && + + git diff --exit-code master:subdir3/file tags/mytag:file && + git diff --exit-code master:subdir2/file tags/mytag^:file && + git diff --exit-code master:subdir1/file tags/mytag^^:file +' + +test_done diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 131f032988..96d07f1833 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -166,6 +166,63 @@ test_expect_success \ test `git rev-parse --verify master:file2` \ = `git rev-parse --verify verify--import-marks:copy-of-file2`' +test_tick +mt=$(git hash-object --stdin < /dev/null) +: >input.blob +: >marks.exp +: >tree.exp + +cat >input.commit <<EOF +commit refs/heads/verify--dump-marks +committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE +data <<COMMIT +test the sparse array dumping routines with exponentially growing marks +COMMIT +EOF + +i=0 +l=4 +m=6 +n=7 +while test "$i" -lt 27; do + cat >>input.blob <<EOF +blob +mark :$l +data 0 +blob +mark :$m +data 0 +blob +mark :$n +data 0 +EOF + echo "M 100644 :$l l$i" >>input.commit + echo "M 100644 :$m m$i" >>input.commit + echo "M 100644 :$n n$i" >>input.commit + + echo ":$l $mt" >>marks.exp + echo ":$m $mt" >>marks.exp + echo ":$n $mt" >>marks.exp + + printf "100644 blob $mt\tl$i\n" >>tree.exp + printf "100644 blob $mt\tm$i\n" >>tree.exp + printf "100644 blob $mt\tn$i\n" >>tree.exp + + l=$(($l + $l)) + m=$(($m + $m)) + n=$(($l + $n)) + + i=$((1 + $i)) +done + +sort tree.exp > tree.exp_s + +test_expect_success 'A: export marks with large values' ' + cat input.blob input.commit | git fast-import --export-marks=marks.large && + git ls-tree refs/heads/verify--dump-marks >tree.out && + test_cmp tree.exp_s tree.out && + test_cmp marks.exp marks.large' + ### ### series B ### @@ -796,6 +853,60 @@ test_expect_success \ 'git fast-import <input && test `git rev-parse N2^{tree}` = `git rev-parse N3^{tree}`' +test_expect_success \ + 'N: copy directory by id' \ + 'cat >expect <<-\EOF && + :100755 100755 f1fb5da718392694d0076d677d6d0e364c79b0bc f1fb5da718392694d0076d677d6d0e364c79b0bc C100 file2/newf file3/newf + :100644 100644 7123f7f44e39be127c5eb701e5968176ee9d78b1 7123f7f44e39be127c5eb701e5968176ee9d78b1 C100 file2/oldf file3/oldf + EOF + subdir=$(git rev-parse refs/heads/branch^0:file2) && + cat >input <<-INPUT_END && + commit refs/heads/N4 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + copy by tree hash + COMMIT + + from refs/heads/branch^0 + M 040000 $subdir file3 + INPUT_END + git fast-import <input && + git diff-tree -C --find-copies-harder -r N4^ N4 >actual && + compare_diff_raw expect actual' + +test_expect_success \ + 'N: modify copied tree' \ + 'cat >expect <<-\EOF && + :100644 100644 fcf778cda181eaa1cbc9e9ce3a2e15ee9f9fe791 fcf778cda181eaa1cbc9e9ce3a2e15ee9f9fe791 C100 newdir/interesting file3/file5 + :100755 100755 f1fb5da718392694d0076d677d6d0e364c79b0bc f1fb5da718392694d0076d677d6d0e364c79b0bc C100 file2/newf file3/newf + :100644 100644 7123f7f44e39be127c5eb701e5968176ee9d78b1 7123f7f44e39be127c5eb701e5968176ee9d78b1 C100 file2/oldf file3/oldf + EOF + subdir=$(git rev-parse refs/heads/branch^0:file2) && + cat >input <<-INPUT_END && + commit refs/heads/N5 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + copy by tree hash + COMMIT + + from refs/heads/branch^0 + M 040000 $subdir file3 + + commit refs/heads/N5 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + modify directory copy + COMMIT + + M 644 inline file3/file5 + data <<EOF + $file5_data + EOF + INPUT_END + git fast-import <input && + git diff-tree -C --find-copies-harder -r N5^^ N5 >actual && + compare_diff_raw expect actual' + ### ### series O ### diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index d43f37ccaf..8c8e679468 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -355,6 +355,20 @@ test_expect_failure 'no exact-ref revisions included' ' ) ' +test_expect_success 'path limiting with import-marks does not lose unmodified files' ' + git checkout -b simple marks~2 && + git fast-export --export-marks=marks simple -- file > /dev/null && + echo more content >> file && + test_tick && + git commit -mnext file && + git fast-export --import-marks=marks simple -- file file0 | grep file0 +' + +test_expect_success 'full-tree re-shows unmodified files' ' + git checkout -f simple && + test $(git fast-export --full-tree simple | grep -c file0) -eq 3 +' + test_expect_success 'set-up a few more tags for tag export tests' ' git checkout -f master && HEAD_TREE=`git show -s --pretty=raw HEAD | grep tree | sed "s/tree //"` && @@ -376,4 +390,28 @@ test_expect_success 'tree_tag-obj' 'git fast-export tree_tag-obj' test_expect_success 'tag-obj_tag' 'git fast-export tag-obj_tag' test_expect_success 'tag-obj_tag-obj' 'git fast-export tag-obj_tag-obj' +test_expect_success SYMLINKS 'directory becomes symlink' ' + git init dirtosymlink && + git init result && + ( + cd dirtosymlink && + mkdir foo && + mkdir bar && + echo hello > foo/world && + echo hello > bar/world && + git add foo/world bar/world && + git commit -q -mone && + git rm -r foo && + ln -s bar foo && + git add foo && + git commit -q -mtwo + ) && + ( + cd dirtosymlink && + git fast-export master -- foo | + (cd ../result && git fast-import --quiet) + ) && + (cd result && git show master:foo) +' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index e5523dd690..3a3d4c4723 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -127,14 +127,13 @@ do -v|--v|--ve|--ver|--verb|--verbo|--verbos|--verbose) verbose=t; shift ;; -q|--q|--qu|--qui|--quie|--quiet) - quiet=t; shift ;; + # Ignore --quiet under a TAP::Harness. Saying how many tests + # passed without the ok/not ok details is always an error. + test -z "$HARNESS_ACTIVE" && quiet=t; shift ;; --with-dashes) with_dashes=t; shift ;; --no-color) color=; shift ;; - --no-python) - # noop now... - shift ;; --va|--val|--valg|--valgr|--valgri|--valgrin|--valgrind) valgrind=t; verbose=t; shift ;; --tee) @@ -257,6 +256,10 @@ q_to_cr () { tr Q '\015' } +q_to_tab () { + tr Q '\011' +} + append_cr () { sed -e 's/$/Q/' | tr Q '\015' } @@ -542,6 +545,38 @@ test_external_without_stderr () { fi } +# debugging-friendly alternatives to "test [-f|-d|-e]" +# The commands test the existence or non-existence of $1. $2 can be +# given to provide a more precise diagnosis. +test_path_is_file () { + if ! [ -f "$1" ] + then + echo "File $1 doesn't exist. $*" + false + fi +} + +test_path_is_dir () { + if ! [ -d "$1" ] + then + echo "Directory $1 doesn't exist. $*" + false + fi +} + +test_path_is_missing () { + if [ -e "$1" ] + then + echo "Path exists:" + ls -ld "$1" + if [ $# -ge 1 ]; then + echo "$*" + fi + false + fi +} + + # This is not among top-level (test_expect_success | test_expect_failure) # but is a prefix that can be used in the test script, like: # diff --git a/test-line-buffer.c b/test-line-buffer.c new file mode 100644 index 0000000000..c11bf7f967 --- /dev/null +++ b/test-line-buffer.c @@ -0,0 +1,46 @@ +/* + * test-line-buffer.c: code to exercise the svn importer's input helper + * + * Input format: + * number NL + * (number bytes) NL + * number NL + * ... + */ + +#include "git-compat-util.h" +#include "vcs-svn/line_buffer.h" + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || *end != '\0') + die("invalid count: %s", s); + return (uint32_t) n; +} + +int main(int argc, char *argv[]) +{ + char *s; + + if (argc != 1) + usage("test-line-buffer < input.txt"); + if (buffer_init(NULL)) + die_errno("open error"); + while ((s = buffer_read_line())) { + s = buffer_read_string(strtouint32(s)); + fputs(s, stdout); + fputc('\n', stdout); + buffer_skip_bytes(1); + if (!(s = buffer_read_line())) + break; + buffer_copy_bytes(strtouint32(s) + 1); + } + if (buffer_deinit()) + die("input error"); + if (ferror(stdout)) + die("output error"); + buffer_reset(); + return 0; +} diff --git a/test-obj-pool.c b/test-obj-pool.c new file mode 100644 index 0000000000..5018863ef5 --- /dev/null +++ b/test-obj-pool.c @@ -0,0 +1,116 @@ +/* + * test-obj-pool.c: code to exercise the svn importer's object pool + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" + +enum pool { POOL_ONE, POOL_TWO }; +obj_pool_gen(one, int, 1) +obj_pool_gen(two, int, 4096) + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid offset: %s", s); + return (uint32_t) n; +} + +static void handle_command(const char *command, enum pool pool, const char *arg) +{ + switch (*command) { + case 'a': + if (!prefixcmp(command, "alloc ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_alloc(n) : two_alloc(n)); + return; + } + case 'c': + if (!prefixcmp(command, "commit ")) { + pool == POOL_ONE ? one_commit() : two_commit(); + return; + } + if (!prefixcmp(command, "committed ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_pool.committed : two_pool.committed); + return; + } + case 'f': + if (!prefixcmp(command, "free ")) { + uint32_t n = strtouint32(arg); + pool == POOL_ONE ? one_free(n) : two_free(n); + return; + } + case 'n': + if (!prefixcmp(command, "null ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(NULL) : two_offset(NULL)); + return; + } + case 'o': + if (!prefixcmp(command, "offset ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(one_pointer(n)) : + two_offset(two_pointer(n))); + return; + } + case 'r': + if (!prefixcmp(command, "reset ")) { + pool == POOL_ONE ? one_reset() : two_reset(); + return; + } + case 's': + if (!prefixcmp(command, "set ")) { + uint32_t n = strtouint32(arg); + if (pool == POOL_ONE) + *one_pointer(n) = 1; + else + *two_pointer(n) = 1; + return; + } + case 't': + if (!prefixcmp(command, "test ")) { + uint32_t n = strtouint32(arg); + printf("%d\n", pool == POOL_ONE ? + *one_pointer(n) : *two_pointer(n)); + return; + } + default: + die("unrecognized command: %s", command); + } +} + +static void handle_line(const char *line) +{ + const char *arg = strchr(line, ' '); + enum pool pool; + + if (arg && !prefixcmp(arg + 1, "one")) + pool = POOL_ONE; + else if (arg && !prefixcmp(arg + 1, "two")) + pool = POOL_TWO; + else + die("no pool specified: %s", line); + + handle_command(line, pool, arg + strlen("one ")); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + if (argc != 1) + usage("test-obj-str < script"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) + handle_line(sb.buf); + strbuf_release(&sb); + return 0; +} diff --git a/test-string-pool.c b/test-string-pool.c new file mode 100644 index 0000000000..c5782e6bce --- /dev/null +++ b/test-string-pool.c @@ -0,0 +1,31 @@ +/* + * test-string-pool.c: code to exercise the svn importer's string pool + */ + +#include "git-compat-util.h" +#include "vcs-svn/string_pool.h" + +int main(int argc, char *argv[]) +{ + const uint32_t unequal = pool_intern("does not equal"); + const uint32_t equal = pool_intern("equals"); + uint32_t buf[3]; + uint32_t n; + + if (argc != 2) + usage("test-string-pool <string>,<string>"); + + n = pool_tok_seq(3, buf, ",-", argv[1]); + if (n >= 3) + die("too many strings"); + if (n <= 1) + die("too few strings"); + + buf[2] = buf[1]; + buf[1] = (buf[0] == buf[2]) ? equal : unequal; + pool_print_seq(3, buf, ' ', stdout); + fputc('\n', stdout); + + pool_reset(); + return 0; +} diff --git a/test-svn-fe.c b/test-svn-fe.c new file mode 100644 index 0000000000..77cf78abcf --- /dev/null +++ b/test-svn-fe.c @@ -0,0 +1,17 @@ +/* + * test-svn-fe: Code to exercise the svn import lib + */ + +#include "git-compat-util.h" +#include "vcs-svn/svndump.h" + +int main(int argc, char *argv[]) +{ + if (argc != 2) + usage("test-svn-fe <file>"); + svndump_init(argv[1]); + svndump_read(NULL); + svndump_deinit(); + svndump_reset(); + return 0; +} diff --git a/test-treap.c b/test-treap.c new file mode 100644 index 0000000000..cdba5111e1 --- /dev/null +++ b/test-treap.c @@ -0,0 +1,65 @@ +/* + * test-treap.c: code to exercise the svn importer's treap structure + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" +#include "vcs-svn/trp.h" + +struct int_node { + uintmax_t n; + struct trp_node children; +}; + +obj_pool_gen(node, struct int_node, 3) + +static int node_cmp(struct int_node *a, struct int_node *b) +{ + return (a->n > b->n) - (a->n < b->n); +} + +trp_gen(static, treap_, struct int_node, children, node, node_cmp) + +static void strtonode(struct int_node *item, const char *s) +{ + char *end; + item->n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid integer: %s", s); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + struct trp_root root = { ~0 }; + uint32_t item; + + if (argc != 1) + usage("test-treap < ints"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) { + item = node_alloc(1); + strtonode(node_pointer(item), sb.buf); + treap_insert(&root, node_pointer(item)); + } + + item = node_offset(treap_first(&root)); + while (~item) { + uint32_t next; + struct int_node *tmp = node_pointer(node_alloc(1)); + + tmp->n = node_pointer(item)->n; + next = node_offset(treap_next(&root, node_pointer(item))); + + treap_remove(&root, node_pointer(item)); + item = node_offset(treap_nsearch(&root, tmp)); + + if (item != next && (!~item || node_pointer(item)->n != tmp->n)) + die("found %"PRIuMAX" in place of %"PRIuMAX"", + ~item ? node_pointer(item)->n : ~(uintmax_t) 0, + ~next ? node_pointer(next)->n : ~(uintmax_t) 0); + printf("%"PRIuMAX"\n", tmp->n); + } + node_reset(); + return 0; +} diff --git a/transport-helper.c b/transport-helper.c index 191fbf798a..acfc88e3f1 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -689,7 +689,7 @@ static int push_refs_with_export(struct transport *transport, struct child_process *helper, exporter; struct helper_data *data = transport->data; char *export_marks = NULL, *import_marks = NULL; - struct string_list revlist_args = { NULL, 0, 0 }; + struct string_list revlist_args = STRING_LIST_INIT_NODUP; struct strbuf buf = STRBUF_INIT; helper = get_helper(transport); diff --git a/tree-diff.c b/tree-diff.c index 1fb3e94614..cd659c6fe4 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -359,6 +359,7 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co diff_tree_release_paths(&diff_opts); /* Go through the new set of filepairing, and see if we find a more interesting one */ + opt->found_follow = 0; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; @@ -376,6 +377,16 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co diff_tree_release_paths(opt); opt->paths[0] = xstrdup(p->one->path); diff_tree_setup_paths(opt->paths, opt); + + /* + * The caller expects us to return a set of vanilla + * filepairs to let a later call to diffcore_std() + * it makes to sort the renames out (among other + * things), but we already have found renames + * ourselves; signal diffcore_std() not to muck with + * rename information. + */ + opt->found_follow = 1; break; } } @@ -412,7 +423,7 @@ int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const cha init_tree_desc(&t1, tree1, size1); init_tree_desc(&t2, tree2, size2); retval = diff_tree(&t1, &t2, base, opt); - if (DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) { + if (!*base && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && diff_might_be_rename()) { init_tree_desc(&t1, tree1, size1); init_tree_desc(&t2, tree2, size2); try_to_follow_renames(&t1, &t2, base, opt); diff --git a/tree-walk.c b/tree-walk.c index 67a9a0c5a5..a9bbf4e235 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -1,5 +1,6 @@ #include "cache.h" #include "tree-walk.h" +#include "unpack-trees.h" #include "tree.h" static const char *get_mode(const char *str, unsigned int *modep) @@ -310,6 +311,7 @@ static void free_extended_entry(struct tree_desc_x *t) int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) { int ret = 0; + int error = 0; struct name_entry *entry = xmalloc(n*sizeof(*entry)); int i; struct tree_desc_x *tx = xcalloc(n, sizeof(*tx)); @@ -377,8 +379,11 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) if (!mask) break; ret = info->fn(n, mask, dirmask, entry, info); - if (ret < 0) - break; + if (ret < 0) { + error = ret; + if (!info->show_all_errors) + break; + } mask &= ret; ret = 0; for (i = 0; i < n; i++) @@ -389,7 +394,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) for (i = 0; i < n; i++) free_extended_entry(tx + i); free(tx); - return ret; + return error; } static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode) diff --git a/tree-walk.h b/tree-walk.h index 42110a465f..7e3e0b5ad1 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -28,7 +28,10 @@ static inline int tree_entry_len(const char *name, const unsigned char *sha1) void update_tree_entry(struct tree_desc *); void init_tree_desc(struct tree_desc *desc, const void *buf, unsigned long size); -/* Helper function that does both of the above and returns true for success */ +/* + * Helper function that does both tree_entry_extract() and update_tree_entry() + * and returns true for success + */ int tree_entry(struct tree_desc *, struct name_entry *); void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1); @@ -45,6 +48,7 @@ struct traverse_info { unsigned long conflicts; traverse_callback_t fn; void *data; + int show_all_errors; }; int get_tree_entry(const unsigned char *, const char *, unsigned char *, unsigned *); diff --git a/unpack-trees.c b/unpack-trees.c index 8cf0da317d..803445aa7b 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -13,37 +13,90 @@ * Error messages expected by scripts out of plumbing commands such as * read-tree. Non-scripted Porcelain is not required to use these messages * and in fact are encouraged to reword them to better suit their particular - * situation better. See how "git checkout" replaces not_uptodate_file to - * explain why it does not allow switching between branches when you have - * local changes, for example. + * situation better. See how "git checkout" and "git merge" replaces + * them using setup_unpack_trees_porcelain(), for example. */ -static struct unpack_trees_error_msgs unpack_plumbing_errors = { - /* would_overwrite */ +const char *unpack_plumbing_errors[NB_UNPACK_TREES_ERROR_TYPES] = { + /* ERROR_WOULD_OVERWRITE */ "Entry '%s' would be overwritten by merge. Cannot merge.", - /* not_uptodate_file */ + /* ERROR_NOT_UPTODATE_FILE */ "Entry '%s' not uptodate. Cannot merge.", - /* not_uptodate_dir */ + /* ERROR_NOT_UPTODATE_DIR */ "Updating '%s' would lose untracked files in it", - /* would_lose_untracked */ - "Untracked working tree file '%s' would be %s by merge.", + /* ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN */ + "Untracked working tree file '%s' would be overwritten by merge.", - /* bind_overlap */ + /* ERROR_WOULD_LOSE_UNTRACKED_REMOVED */ + "Untracked working tree file '%s' would be removed by merge.", + + /* ERROR_BIND_OVERLAP */ "Entry '%s' overlaps with '%s'. Cannot bind.", - /* sparse_not_uptodate_file */ + /* ERROR_SPARSE_NOT_UPTODATE_FILE */ "Entry '%s' not uptodate. Cannot update sparse checkout.", - /* would_lose_orphaned */ - "Working tree file '%s' would be %s by sparse checkout update.", + /* ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN */ + "Working tree file '%s' would be overwritten by sparse checkout update.", + + /* ERROR_WOULD_LOSE_ORPHANED_REMOVED */ + "Working tree file '%s' would be removed by sparse checkout update.", }; -#define ERRORMSG(o,fld) \ - ( ((o) && (o)->msgs.fld) \ - ? ((o)->msgs.fld) \ - : (unpack_plumbing_errors.fld) ) +#define ERRORMSG(o,type) \ + ( ((o) && (o)->msgs[(type)]) \ + ? ((o)->msgs[(type)]) \ + : (unpack_plumbing_errors[(type)]) ) + +void setup_unpack_trees_porcelain(struct unpack_trees_options *opts, + const char *cmd) +{ + const char **msgs = opts->msgs; + const char *msg; + char *tmp; + const char *cmd2 = strcmp(cmd, "checkout") ? cmd : "switch branches"; + if (advice_commit_before_merge) + msg = "Your local changes to the following files would be overwritten by %s:\n%%s" + "Please, commit your changes or stash them before you can %s."; + else + msg = "Your local changes to the following files would be overwritten by %s:\n%%s"; + tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen(cmd2) - 2); + sprintf(tmp, msg, cmd, cmd2); + msgs[ERROR_WOULD_OVERWRITE] = tmp; + msgs[ERROR_NOT_UPTODATE_FILE] = tmp; + + msgs[ERROR_NOT_UPTODATE_DIR] = + "Updating the following directories would lose untracked files in it:\n%s"; + + if (advice_commit_before_merge) + msg = "The following untracked working tree files would be %s by %s:\n%%s" + "Please move or remove them before you can %s."; + else + msg = "The following untracked working tree files would be %s by %s:\n%%s"; + tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen("removed") + strlen(cmd2) - 4); + sprintf(tmp, msg, "removed", cmd, cmd2); + msgs[ERROR_WOULD_LOSE_UNTRACKED_REMOVED] = tmp; + tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen("overwritten") + strlen(cmd2) - 4); + sprintf(tmp, msg, "overwritten", cmd, cmd2); + msgs[ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN] = tmp; + + /* + * Special case: ERROR_BIND_OVERLAP refers to a pair of paths, we + * cannot easily display it as a list. + */ + msgs[ERROR_BIND_OVERLAP] = "Entry '%s' overlaps with '%s'. Cannot bind."; + + msgs[ERROR_SPARSE_NOT_UPTODATE_FILE] = + "Cannot update sparse checkout: the following entries are not up-to-date:\n%s"; + msgs[ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN] = + "The following Working tree files would be overwritten by sparse checkout update:\n%s"; + msgs[ERROR_WOULD_LOSE_ORPHANED_REMOVED] = + "The following Working tree files would be removed by sparse checkout update:\n%s"; + + opts->show_all_errors = 1; +} static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce, unsigned int set, unsigned int clear) @@ -53,6 +106,9 @@ static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce, clear |= CE_HASHED | CE_UNHASHED; + if (set & CE_REMOVE) + set |= CE_WT_REMOVE; + memcpy(new, ce, size); new->next = NULL; new->ce_flags = (new->ce_flags & ~clear) | set; @@ -60,6 +116,67 @@ static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce, } /* + * add error messages on path <path> + * corresponding to the type <e> with the message <msg> + * indicating if it should be display in porcelain or not + */ +static int add_rejected_path(struct unpack_trees_options *o, + enum unpack_trees_error_types e, + const char *path) +{ + struct rejected_paths_list *newentry; + if (!o->show_all_errors) + return error(ERRORMSG(o, e), path); + + /* + * Otherwise, insert in a list for future display by + * display_error_msgs() + */ + newentry = xmalloc(sizeof(struct rejected_paths_list)); + newentry->path = (char *)path; + newentry->next = o->unpack_rejects[e]; + o->unpack_rejects[e] = newentry; + return -1; +} + +/* + * free all the structures allocated for the error <e> + */ +static void free_rejected_paths(struct unpack_trees_options *o, + enum unpack_trees_error_types e) +{ + while (o->unpack_rejects[e]) { + struct rejected_paths_list *del = o->unpack_rejects[e]; + o->unpack_rejects[e] = o->unpack_rejects[e]->next; + free(del); + } + free(o->unpack_rejects[e]); +} + +/* + * display all the error messages stored in a nice way + */ +static void display_error_msgs(struct unpack_trees_options *o) +{ + int e; + int something_displayed = 0; + for (e = 0; e < NB_UNPACK_TREES_ERROR_TYPES; e++) { + if (o->unpack_rejects[e]) { + struct rejected_paths_list *rp; + struct strbuf path = STRBUF_INIT; + something_displayed = 1; + for (rp = o->unpack_rejects[e]; rp; rp = rp->next) + strbuf_addf(&path, "\t%s\n", rp->path); + error(ERRORMSG(o, e), path.buf); + strbuf_release(&path); + free_rejected_paths(o, e); + } + } + if (something_displayed) + printf("Aborting\n"); +} + +/* * Unlink the last component and schedule the leading directories for * removal, such that empty directories get removed. */ @@ -84,7 +201,7 @@ static int check_updates(struct unpack_trees_options *o) if (o->update && o->verbose_update) { for (total = cnt = 0; cnt < index->cache_nr; cnt++) { struct cache_entry *ce = index->cache[cnt]; - if (ce->ce_flags & (CE_UPDATE | CE_REMOVE | CE_WT_REMOVE)) + if (ce->ce_flags & (CE_UPDATE | CE_WT_REMOVE)) total++; } @@ -104,12 +221,6 @@ static int check_updates(struct unpack_trees_options *o) unlink_entry(ce); continue; } - - if (ce->ce_flags & CE_REMOVE) { - display_progress(progress, ++cnt); - if (o->update) - unlink_entry(ce); - } } remove_marked_cache_entries(&o->result); remove_scheduled_dirs(); @@ -132,15 +243,12 @@ static int check_updates(struct unpack_trees_options *o) } static int verify_uptodate_sparse(struct cache_entry *ce, struct unpack_trees_options *o); -static int verify_absent_sparse(struct cache_entry *ce, const char *action, struct unpack_trees_options *o); +static int verify_absent_sparse(struct cache_entry *ce, enum unpack_trees_error_types, struct unpack_trees_options *o); static int will_have_skip_worktree(const struct cache_entry *ce, struct unpack_trees_options *o) { const char *basename; - if (ce_stage(ce)) - return 0; - basename = strrchr(ce->name, '/'); basename = basename ? basename+1 : ce->name; return excluded_from_list(ce->name, ce_namelen(ce), basename, NULL, o->el) <= 0; @@ -150,19 +258,36 @@ static int apply_sparse_checkout(struct cache_entry *ce, struct unpack_trees_opt { int was_skip_worktree = ce_skip_worktree(ce); - if (will_have_skip_worktree(ce, o)) + if (!ce_stage(ce) && will_have_skip_worktree(ce, o)) ce->ce_flags |= CE_SKIP_WORKTREE; else ce->ce_flags &= ~CE_SKIP_WORKTREE; /* - * We only care about files getting into the checkout area - * If merge strategies want to remove some, go ahead, this - * flag will be removed eventually in unpack_trees() if it's - * outside checkout area. + * if (!was_skip_worktree && !ce_skip_worktree()) { + * This is perfectly normal. Move on; + * } */ - if (ce->ce_flags & CE_REMOVE) - return 0; + + /* + * Merge strategies may set CE_UPDATE|CE_REMOVE outside checkout + * area as a result of ce_skip_worktree() shortcuts in + * verify_absent() and verify_uptodate(). + * Make sure they don't modify worktree if they are already + * outside checkout area + */ + if (was_skip_worktree && ce_skip_worktree(ce)) { + ce->ce_flags &= ~CE_UPDATE; + + /* + * By default, when CE_REMOVE is on, CE_WT_REMOVE is also + * on to get that file removed from both index and worktree. + * If that file is already outside worktree area, don't + * bother remove it. + */ + if (ce->ce_flags & CE_REMOVE) + ce->ce_flags &= ~CE_WT_REMOVE; + } if (!was_skip_worktree && ce_skip_worktree(ce)) { /* @@ -175,7 +300,7 @@ static int apply_sparse_checkout(struct cache_entry *ce, struct unpack_trees_opt ce->ce_flags |= CE_WT_REMOVE; } if (was_skip_worktree && !ce_skip_worktree(ce)) { - if (verify_absent_sparse(ce, "overwritten", o)) + if (verify_absent_sparse(ce, ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o)) return -1; ce->ce_flags |= CE_UPDATE; } @@ -329,6 +454,7 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long { int i, ret, bottom; struct tree_desc t[MAX_UNPACK_TREES]; + void *buf[MAX_UNPACK_TREES]; struct traverse_info newinfo; struct name_entry *p; @@ -346,12 +472,16 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long const unsigned char *sha1 = NULL; if (dirmask & 1) sha1 = names[i].sha1; - fill_tree_descriptor(t+i, sha1); + buf[i] = fill_tree_descriptor(t+i, sha1); } bottom = switch_cache_bottom(&newinfo); ret = traverse_trees(n, t, &newinfo); restore_cache_bottom(&newinfo, bottom); + + for (i = 0; i < n; i++) + free(buf[i]); + return ret; } @@ -750,6 +880,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options setup_traverse_info(&info, prefix); info.fn = unpack_callback; info.data = o; + info.show_all_errors = o->show_all_errors; if (o->prefix) { /* @@ -798,14 +929,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options ret = -1; goto done; } - /* - * Merge strategies may set CE_UPDATE|CE_REMOVE outside checkout - * area as a result of ce_skip_worktree() shortcuts in - * verify_absent() and verify_uptodate(). Clear them. - */ - if (ce_skip_worktree(ce)) - ce->ce_flags &= ~(CE_UPDATE | CE_REMOVE); - else + if (!ce_skip_worktree(ce)) empty_worktree = 0; } @@ -829,6 +953,8 @@ done: return ret; return_failed: + if (o->show_all_errors) + display_error_msgs(o); mark_all_ce_unused(o->src_index); ret = unpack_failed(o, NULL); goto done; @@ -838,7 +964,7 @@ return_failed: static int reject_merge(struct cache_entry *ce, struct unpack_trees_options *o) { - return error(ERRORMSG(o, would_overwrite), ce->name); + return add_rejected_path(o, ERROR_WOULD_OVERWRITE, ce->name); } static int same(struct cache_entry *a, struct cache_entry *b) @@ -860,7 +986,7 @@ static int same(struct cache_entry *a, struct cache_entry *b) */ static int verify_uptodate_1(struct cache_entry *ce, struct unpack_trees_options *o, - const char *error_msg) + enum unpack_trees_error_types error_type) { struct stat st; @@ -885,7 +1011,7 @@ static int verify_uptodate_1(struct cache_entry *ce, if (errno == ENOENT) return 0; return o->gently ? -1 : - error(error_msg, ce->name); + add_rejected_path(o, error_type, ce->name); } static int verify_uptodate(struct cache_entry *ce, @@ -893,13 +1019,13 @@ static int verify_uptodate(struct cache_entry *ce, { if (!o->skip_sparse_checkout && will_have_skip_worktree(ce, o)) return 0; - return verify_uptodate_1(ce, o, ERRORMSG(o, not_uptodate_file)); + return verify_uptodate_1(ce, o, ERROR_NOT_UPTODATE_FILE); } static int verify_uptodate_sparse(struct cache_entry *ce, struct unpack_trees_options *o) { - return verify_uptodate_1(ce, o, ERRORMSG(o, sparse_not_uptodate_file)); + return verify_uptodate_1(ce, o, ERROR_SPARSE_NOT_UPTODATE_FILE); } static void invalidate_ce_path(struct cache_entry *ce, struct unpack_trees_options *o) @@ -915,13 +1041,15 @@ static void invalidate_ce_path(struct cache_entry *ce, struct unpack_trees_optio * Currently, git does not checkout subprojects during a superproject * checkout, so it is not going to overwrite anything. */ -static int verify_clean_submodule(struct cache_entry *ce, const char *action, +static int verify_clean_submodule(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { return 0; } -static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, +static int verify_clean_subdirectory(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { /* @@ -942,7 +1070,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, */ if (!hashcmp(sha1, ce->sha1)) return 0; - return verify_clean_submodule(ce, action, o); + return verify_clean_submodule(ce, error_type, o); } /* @@ -986,7 +1114,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, i = read_directory(&d, pathbuf, namelen+1, NULL); if (i) return o->gently ? -1 : - error(ERRORMSG(o, not_uptodate_dir), ce->name); + add_rejected_path(o, ERROR_NOT_UPTODATE_DIR, ce->name); free(pathbuf); return cnt; } @@ -1011,9 +1139,9 @@ static int icase_exists(struct unpack_trees_options *o, struct cache_entry *dst, * We do not want to remove or overwrite a working tree file that * is not tracked, unless it is ignored. */ -static int verify_absent_1(struct cache_entry *ce, const char *action, - struct unpack_trees_options *o, - const char *error_msg) +static int verify_absent_1(struct cache_entry *ce, + enum unpack_trees_error_types error_type, + struct unpack_trees_options *o) { struct stat st; @@ -1051,7 +1179,7 @@ static int verify_absent_1(struct cache_entry *ce, const char *action, * files that are in "foo/" we would lose * them. */ - if (verify_clean_subdirectory(ce, action, o) < 0) + if (verify_clean_subdirectory(ce, error_type, o) < 0) return -1; return 0; } @@ -1068,22 +1196,28 @@ static int verify_absent_1(struct cache_entry *ce, const char *action, } return o->gently ? -1 : - error(ERRORMSG(o, would_lose_untracked), ce->name, action); + add_rejected_path(o, error_type, ce->name); } return 0; } -static int verify_absent(struct cache_entry *ce, const char *action, +static int verify_absent(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { if (!o->skip_sparse_checkout && will_have_skip_worktree(ce, o)) return 0; - return verify_absent_1(ce, action, o, ERRORMSG(o, would_lose_untracked)); + return verify_absent_1(ce, error_type, o); } -static int verify_absent_sparse(struct cache_entry *ce, const char *action, +static int verify_absent_sparse(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { - return verify_absent_1(ce, action, o, ERRORMSG(o, would_lose_orphaned)); + enum unpack_trees_error_types orphaned_error = error_type; + if (orphaned_error == ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN) + orphaned_error = ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN; + + return verify_absent_1(ce, orphaned_error, o); } static int merged_entry(struct cache_entry *merge, struct cache_entry *old, @@ -1092,8 +1226,10 @@ static int merged_entry(struct cache_entry *merge, struct cache_entry *old, int update = CE_UPDATE; if (!old) { - if (verify_absent(merge, "overwritten", o)) + if (verify_absent(merge, ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o)) return -1; + if (!o->skip_sparse_checkout && will_have_skip_worktree(merge, o)) + update |= CE_SKIP_WORKTREE; invalidate_ce_path(merge, o); } else if (!(old->ce_flags & CE_CONFLICTED)) { /* @@ -1130,7 +1266,7 @@ static int deleted_entry(struct cache_entry *ce, struct cache_entry *old, { /* Did it exist in the index? */ if (!old) { - if (verify_absent(ce, "removed", o)) + if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o)) return -1; return 0; } @@ -1279,7 +1415,7 @@ int threeway_merge(struct cache_entry **stages, struct unpack_trees_options *o) if (index) return deleted_entry(index, index, o); if (ce && !head_deleted) { - if (verify_absent(ce, "removed", o)) + if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o)) return -1; } return 0; @@ -1412,7 +1548,7 @@ int bind_merge(struct cache_entry **src, o->merge_size); if (a && old) return o->gently ? -1 : - error(ERRORMSG(o, bind_overlap), a->name, old->name); + error(ERRORMSG(o, ERROR_BIND_OVERLAP), a->name, old->name); if (!a) return keep_entry(old, o); else diff --git a/unpack-trees.h b/unpack-trees.h index ef70eab390..7c0187d11a 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -9,14 +9,29 @@ struct exclude_list; typedef int (*merge_fn_t)(struct cache_entry **src, struct unpack_trees_options *options); -struct unpack_trees_error_msgs { - const char *would_overwrite; - const char *not_uptodate_file; - const char *not_uptodate_dir; - const char *would_lose_untracked; - const char *bind_overlap; - const char *sparse_not_uptodate_file; - const char *would_lose_orphaned; +enum unpack_trees_error_types { + ERROR_WOULD_OVERWRITE = 0, + ERROR_NOT_UPTODATE_FILE, + ERROR_NOT_UPTODATE_DIR, + ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, + ERROR_WOULD_LOSE_UNTRACKED_REMOVED, + ERROR_BIND_OVERLAP, + ERROR_SPARSE_NOT_UPTODATE_FILE, + ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN, + ERROR_WOULD_LOSE_ORPHANED_REMOVED, + NB_UNPACK_TREES_ERROR_TYPES +}; + +/* + * Sets the list of user-friendly error messages to be used by the + * command "cmd" (either merge or checkout), and show_all_errors to 1. + */ +void setup_unpack_trees_porcelain(struct unpack_trees_options *opts, + const char *cmd); + +struct rejected_paths_list { + char *path; + struct rejected_paths_list *next; }; struct unpack_trees_options { @@ -33,12 +48,18 @@ struct unpack_trees_options { diff_index_cached, debug_unpack, skip_sparse_checkout, - gently; + gently, + show_all_errors; const char *prefix; int cache_bottom; struct dir_struct *dir; merge_fn_t fn; - struct unpack_trees_error_msgs msgs; + const char *msgs[NB_UNPACK_TREES_ERROR_TYPES]; + /* + * Store error messages in an array, each case + * corresponding to a error message type + */ + struct rejected_paths_list *unpack_rejects[NB_UNPACK_TREES_ERROR_TYPES]; int head_idx; int merge_size; diff --git a/upload-pack.c b/upload-pack.c index dc464d78b3..92f9530c65 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -105,7 +105,7 @@ static void show_edge(struct commit *commit) fprintf(pack_pipe, "-%s\n", sha1_to_hex(commit->object.sha1)); } -static int do_rev_list(int in, int out, void *create_full_pack) +static int do_rev_list(int in, int out, void *user_data) { int i; struct rev_info revs; @@ -118,23 +118,18 @@ static int do_rev_list(int in, int out, void *create_full_pack) if (use_thin_pack) revs.edge_hint = 1; - if (create_full_pack) { - const char *args[] = {"rev-list", "--all", NULL}; - setup_revisions(2, args, &revs, NULL); - } else { - for (i = 0; i < want_obj.nr; i++) { - struct object *o = want_obj.objects[i].item; - /* why??? */ - o->flags &= ~UNINTERESTING; - add_pending_object(&revs, o, NULL); - } - for (i = 0; i < have_obj.nr; i++) { - struct object *o = have_obj.objects[i].item; - o->flags |= UNINTERESTING; - add_pending_object(&revs, o, NULL); - } - setup_revisions(0, NULL, &revs, NULL); + for (i = 0; i < want_obj.nr; i++) { + struct object *o = want_obj.objects[i].item; + /* why??? */ + o->flags &= ~UNINTERESTING; + add_pending_object(&revs, o, NULL); + } + for (i = 0; i < have_obj.nr; i++) { + struct object *o = have_obj.objects[i].item; + o->flags |= UNINTERESTING; + add_pending_object(&revs, o, NULL); } + setup_revisions(0, NULL, &revs, NULL); if (prepare_revision_walk(&revs)) die("revision walk setup failed"); mark_edges_uninteresting(revs.commits, &revs, show_edge); @@ -487,7 +482,7 @@ static int get_common_commits(void) static void receive_needs(void) { - struct object_array shallows = {0, 0, NULL}; + struct object_array shallows = OBJECT_ARRAY_INIT; static char line[1000]; int len, depth = 0; @@ -554,7 +549,8 @@ static void receive_needs(void) */ o = lookup_object(sha1_buf); if (!o || !(o->flags & OUR_REF)) - die("git upload-pack: not our ref %s", line+5); + die("git upload-pack: not our ref %s", + sha1_to_hex(sha1_buf)); if (!(o->flags & WANTED)) { o->flags |= WANTED; add_object_array(o, NULL, &want_obj); @@ -67,7 +67,8 @@ static int url_decode_char(const char *q) return val; } -static char *url_decode_internal(const char **query, const char *stop_at, struct strbuf *out) +static char *url_decode_internal(const char **query, const char *stop_at, + struct strbuf *out, int decode_plus) { const char *q = *query; @@ -90,7 +91,7 @@ static char *url_decode_internal(const char **query, const char *stop_at, struct } } - if (c == '+') + if (decode_plus && c == '+') strbuf_addch(out, ' '); else strbuf_addch(out, c); @@ -110,17 +111,17 @@ char *url_decode(const char *url) strbuf_add(&out, url, colon - url); url = colon; } - return url_decode_internal(&url, NULL, &out); + return url_decode_internal(&url, NULL, &out, 0); } char *url_decode_parameter_name(const char **query) { struct strbuf out = STRBUF_INIT; - return url_decode_internal(query, "&=", &out); + return url_decode_internal(query, "&=", &out, 1); } char *url_decode_parameter_value(const char **query) { struct strbuf out = STRBUF_INIT; - return url_decode_internal(query, "&", &out); + return url_decode_internal(query, "&", &out, 1); } diff --git a/userdiff.c b/userdiff.c index c49cc1b67e..e5522159b3 100644 --- a/userdiff.c +++ b/userdiff.c @@ -82,6 +82,22 @@ PATTERNS("cpp", "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("csharp", + /* Keywords */ + "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" + /* Methods and constructors */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" + /* Properties */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* Type definitions */ + "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct)[ \t]+.*)$\n" + /* Namespace */ + "^[ \t]*(namespace[ \t]+.*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), { "default", NULL, -1, { NULL, 0 } }, }; #undef PATTERNS diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE new file mode 100644 index 0000000000..0a5e3c43a0 --- /dev/null +++ b/vcs-svn/LICENSE @@ -0,0 +1,33 @@ +Copyright (C) 2010 David Barr <david.barr@cordelta.com>. +All rights reserved. + +Copyright (C) 2008 Jason Evans <jasone@canonware.com>. +All rights reserved. + +Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, +Frankfurt/Main, Germany +and others, see http://svn2cc.sarovar.org + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice(s), this list of conditions and the following disclaimer + unmodified other than the allowable addition of one or more + copyright notices. +2. Redistributions in binary form must reproduce the above copyright + notice(s), this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c new file mode 100644 index 0000000000..256a0522b2 --- /dev/null +++ b/vcs-svn/fast_export.c @@ -0,0 +1,75 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "repo_tree.h" +#include "string_pool.h" + +#define MAX_GITSVN_LINE_LEN 4096 + +static uint32_t first_commit_done; + +void fast_export_delete(uint32_t depth, uint32_t *path) +{ + putchar('D'); + putchar(' '); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark) +{ + /* Mode must be 100644, 100755, 120000, or 160000. */ + printf("M %06o :%d ", mode, mark); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +static char gitsvnline[MAX_GITSVN_LINE_LEN]; +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, + unsigned long timestamp) +{ + if (!log) + log = ""; + if (~uuid && ~url) { + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n", + pool_fetch(url), revision, pool_fetch(uuid)); + } else { + *gitsvnline = '\0'; + } + printf("commit refs/heads/master\n"); + printf("committer %s <%s@%s> %ld +0000\n", + ~author ? pool_fetch(author) : "nobody", + ~author ? pool_fetch(author) : "nobody", + ~uuid ? pool_fetch(uuid) : "local", timestamp); + printf("data %"PRIu32"\n%s%s\n", + (uint32_t) (strlen(log) + strlen(gitsvnline)), + log, gitsvnline); + if (!first_commit_done) { + if (revision > 1) + printf("from refs/heads/master^0\n"); + first_commit_done = 1; + } + repo_diff(revision - 1, revision); + fputc('\n', stdout); + + printf("progress Imported commit %d.\n\n", revision); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +{ + if (mode == REPO_MODE_LNK) { + /* svn symlink blobs start with "link " */ + buffer_skip_bytes(5); + len -= 5; + } + printf("blob\nmark :%d\ndata %d\n", mark, len); + buffer_copy_bytes(len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h new file mode 100644 index 0000000000..2aaaea53d5 --- /dev/null +++ b/vcs-svn/fast_export.h @@ -0,0 +1,11 @@ +#ifndef FAST_EXPORT_H_ +#define FAST_EXPORT_H_ + +void fast_export_delete(uint32_t depth, uint32_t *path); +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark); +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, unsigned long timestamp); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); + +#endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c new file mode 100644 index 0000000000..1543567093 --- /dev/null +++ b/vcs-svn/line_buffer.c @@ -0,0 +1,97 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "obj_pool.h" + +#define LINE_BUFFER_LEN 10000 +#define COPY_BUFFER_LEN 4096 + +/* Create memory pool for char sequence of known length */ +obj_pool_gen(blob, char, 4096) + +static char line_buffer[LINE_BUFFER_LEN]; +static char byte_buffer[COPY_BUFFER_LEN]; +static FILE *infile; + +int buffer_init(const char *filename) +{ + infile = filename ? fopen(filename, "r") : stdin; + if (!infile) + return -1; + return 0; +} + +int buffer_deinit(void) +{ + int err; + if (infile == stdin) + return ferror(infile); + err = ferror(infile); + err |= fclose(infile); + return err; +} + +/* Read a line without trailing newline. */ +char *buffer_read_line(void) +{ + char *end; + if (!fgets(line_buffer, sizeof(line_buffer), infile)) + /* Error or data exhausted. */ + return NULL; + end = line_buffer + strlen(line_buffer); + if (end[-1] == '\n') + end[-1] = '\0'; + else if (feof(infile)) + ; /* No newline at end of file. That's fine. */ + else + /* + * Line was too long. + * There is probably a saner way to deal with this, + * but for now let's return an error. + */ + return NULL; + return line_buffer; +} + +char *buffer_read_string(uint32_t len) +{ + char *s; + blob_free(blob_pool.size); + s = blob_pointer(blob_alloc(len + 1)); + s[fread(s, 1, len, infile)] = '\0'; + return ferror(infile) ? NULL : s; +} + +void buffer_copy_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + fwrite(byte_buffer, 1, in, stdout); + if (ferror(stdout)) { + buffer_skip_bytes(len); + return; + } + } +} + +void buffer_skip_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + } +} + +void buffer_reset(void) +{ + blob_reset(); +} diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h new file mode 100644 index 0000000000..9c78ae11a1 --- /dev/null +++ b/vcs-svn/line_buffer.h @@ -0,0 +1,12 @@ +#ifndef LINE_BUFFER_H_ +#define LINE_BUFFER_H_ + +int buffer_init(const char *filename); +int buffer_deinit(void); +char *buffer_read_line(void); +char *buffer_read_string(uint32_t len); +void buffer_copy_bytes(uint32_t len); +void buffer_skip_bytes(uint32_t len); +void buffer_reset(void); + +#endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt new file mode 100644 index 0000000000..8906fb1f50 --- /dev/null +++ b/vcs-svn/line_buffer.txt @@ -0,0 +1,58 @@ +line_buffer API +=============== + +The line_buffer library provides a convenient interface for +mostly-line-oriented input. + +Each line is not permitted to exceed 10000 bytes. The provided +functions are not thread-safe or async-signal-safe, and like +`fgets()`, they generally do not function correctly if interrupted +by a signal without SA_RESTART set. + +Calling sequence +---------------- + +The calling program: + + - specifies a file to read with `buffer_init` + - processes input with `buffer_read_line`, `buffer_read_string`, + `buffer_skip_bytes`, and `buffer_copy_bytes` + - closes the file with `buffer_deinit`, perhaps to start over and + read another file. + +Before exiting, the caller can use `buffer_reset` to deallocate +resources for the benefit of profiling tools. + +Functions +--------- + +`buffer_init`:: + Open the named file for input. If filename is NULL, + start reading from stdin. On failure, returns -1 (with + errno indicating the nature of the failure). + +`buffer_deinit`:: + Stop reading from the current file (closing it unless + it was stdin). Returns nonzero if `fclose` fails or + the error indicator was set. + +`buffer_read_line`:: + Read a line and strip off the trailing newline. + On failure or end of file, returns NULL. + +`buffer_read_string`:: + Read `len` characters of input or up to the end of the + file, whichever comes first. Returns NULL on error. + Returns whatever characters were read (possibly "") + for end of file. + +`buffer_copy_bytes`:: + Read `len` bytes of input and dump them to the standard output + stream. Returns early for error or end of file. + +`buffer_skip_bytes`:: + Discards `len` bytes from the input stream (stopping early + if necessary because of an error or eof). + +`buffer_reset`:: + Deallocates non-static buffers. diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h new file mode 100644 index 0000000000..deb6eb8135 --- /dev/null +++ b/vcs-svn/obj_pool.h @@ -0,0 +1,61 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef OBJ_POOL_H_ +#define OBJ_POOL_H_ + +#include "git-compat-util.h" + +#define MAYBE_UNUSED __attribute__((__unused__)) + +#define obj_pool_gen(pre, obj_t, initial_capacity) \ +static struct { \ + uint32_t committed; \ + uint32_t size; \ + uint32_t capacity; \ + obj_t *base; \ +} pre##_pool = {0, 0, 0, NULL}; \ +static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ +{ \ + uint32_t offset; \ + if (pre##_pool.size + count > pre##_pool.capacity) { \ + while (pre##_pool.size + count > pre##_pool.capacity) \ + if (pre##_pool.capacity) \ + pre##_pool.capacity *= 2; \ + else \ + pre##_pool.capacity = initial_capacity; \ + pre##_pool.base = realloc(pre##_pool.base, \ + pre##_pool.capacity * sizeof(obj_t)); \ + } \ + offset = pre##_pool.size; \ + pre##_pool.size += count; \ + return offset; \ +} \ +static MAYBE_UNUSED void pre##_free(uint32_t count) \ +{ \ + pre##_pool.size -= count; \ +} \ +static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ +{ \ + return obj == NULL ? ~0 : obj - pre##_pool.base; \ +} \ +static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ +{ \ + return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ +} \ +static MAYBE_UNUSED void pre##_commit(void) \ +{ \ + pre##_pool.committed = pre##_pool.size; \ +} \ +static MAYBE_UNUSED void pre##_reset(void) \ +{ \ + free(pre##_pool.base); \ + pre##_pool.base = NULL; \ + pre##_pool.size = 0; \ + pre##_pool.capacity = 0; \ + pre##_pool.committed = 0; \ +} + +#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c new file mode 100644 index 0000000000..e94d91d129 --- /dev/null +++ b/vcs-svn/repo_tree.c @@ -0,0 +1,329 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" + +#include "string_pool.h" +#include "repo_tree.h" +#include "obj_pool.h" +#include "fast_export.h" + +#include "trp.h" + +struct repo_dirent { + uint32_t name_offset; + struct trp_node children; + uint32_t mode; + uint32_t content_offset; +}; + +struct repo_dir { + struct trp_root entries; +}; + +struct repo_commit { + uint32_t root_dir_offset; +}; + +/* Memory pools for commit, dir and dirent */ +obj_pool_gen(commit, struct repo_commit, 4096) +obj_pool_gen(dir, struct repo_dir, 4096) +obj_pool_gen(dent, struct repo_dirent, 4096) + +static uint32_t active_commit; +static uint32_t mark; + +static int repo_dirent_name_cmp(const void *a, const void *b); + +/* Treap for directory entries */ +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); + +uint32_t next_blob_mark(void) +{ + return mark++; +} + +static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) +{ + return dir_pointer(commit->root_dir_offset); +} + +static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) +{ + return dent_first(&dir->entries); +} + +static int repo_dirent_name_cmp(const void *a, const void *b) +{ + const struct repo_dirent *dent1 = a, *dent2 = b; + uint32_t a_offset = dent1->name_offset; + uint32_t b_offset = dent2->name_offset; + return (a_offset > b_offset) - (a_offset < b_offset); +} + +static int repo_dirent_is_dir(struct repo_dirent *dent) +{ + return dent != NULL && dent->mode == REPO_MODE_DIR; +} + +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) +{ + if (!repo_dirent_is_dir(dent)) + return NULL; + return dir_pointer(dent->content_offset); +} + +static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) +{ + uint32_t orig_o, new_o; + orig_o = dir_offset(orig_dir); + if (orig_o >= dir_pool.committed) + return orig_dir; + new_o = dir_alloc(1); + orig_dir = dir_pointer(orig_o); + *dir_pointer(new_o) = *orig_dir; + return dir_pointer(new_o); +} + +static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) +{ + uint32_t name = 0; + struct repo_dirent *key = dent_pointer(dent_alloc(1)); + struct repo_dir *dir = NULL; + struct repo_dirent *dent = NULL; + dir = repo_commit_root_dir(commit_pointer(revision)); + while (~(name = *path++)) { + key->name_offset = name; + dent = dent_search(&dir->entries, key); + if (dent == NULL || !repo_dirent_is_dir(dent)) + break; + dir = repo_dir_from_dirent(dent); + } + dent_free(1); + return dent; +} + +static void repo_write_dirent(uint32_t *path, uint32_t mode, + uint32_t content_offset, uint32_t del) +{ + uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; + struct repo_dir *dir; + struct repo_dirent *key; + struct repo_dirent *dent = NULL; + revision = active_commit; + dir = repo_commit_root_dir(commit_pointer(revision)); + dir = repo_clone_dir(dir); + commit_pointer(revision)->root_dir_offset = dir_offset(dir); + while (~(name = *path++)) { + parent_dir_o = dir_offset(dir); + + key = dent_pointer(dent_alloc(1)); + key->name_offset = name; + + dent = dent_search(&dir->entries, key); + if (dent == NULL) + dent = key; + else + dent_free(1); + + if (dent == key) { + dent->mode = REPO_MODE_DIR; + dent->content_offset = 0; + dent_insert(&dir->entries, dent); + } + + if (dent_offset(dent) < dent_pool.committed) { + dir_o = repo_dirent_is_dir(dent) ? + dent->content_offset : ~0; + dent_remove(&dir->entries, dent); + dent = dent_pointer(dent_alloc(1)); + dent->name_offset = name; + dent->mode = REPO_MODE_DIR; + dent->content_offset = dir_o; + dent_insert(&dir->entries, dent); + } + + dir = repo_dir_from_dirent(dent); + dir = repo_clone_dir(dir); + dent->content_offset = dir_offset(dir); + } + if (dent == NULL) + return; + dent->mode = mode; + dent->content_offset = content_offset; + if (del && ~parent_dir_o) + dent_remove(&dir_pointer(parent_dir_o)->entries, dent); +} + +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) +{ + uint32_t mode = 0, content_offset = 0; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(revision, src); + if (src_dent != NULL) { + mode = src_dent->mode; + content_offset = src_dent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); + } + return mode; +} + +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +{ + uint32_t mode = 0; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL) { + mode = src_dent->mode; + repo_write_dirent(path, mode, blob_mark, 0); + } + return mode; +} + +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL && blob_mark == 0) + blob_mark = src_dent->content_offset; + repo_write_dirent(path, mode, blob_mark, 0); +} + +void repo_delete(uint32_t *path) +{ + repo_write_dirent(path, 0, 0, 1); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); + +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) +{ + if (repo_dirent_is_dir(dent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); + else + fast_export_modify(depth, path, + dent->mode, dent->content_offset); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) +{ + struct repo_dirent *de = repo_first_dirent(dir); + while (de) { + path[depth] = de->name_offset; + repo_git_add(depth + 1, path, de); + de = dent_next(&dir->entries, de); + } +} + +static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, + struct repo_dir *dir2) +{ + struct repo_dirent *de1, *de2; + de1 = repo_first_dirent(dir1); + de2 = repo_first_dirent(dir2); + + while (de1 && de2) { + if (de1->name_offset < de2->name_offset) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + continue; + } + if (de1->name_offset > de2->name_offset) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + continue; + } + path[depth] = de1->name_offset; + + if (de1->mode == de2->mode && + de1->content_offset == de2->content_offset) { + ; /* No change. */ + } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { + repo_diff_r(depth + 1, path, + repo_dir_from_dirent(de1), + repo_dir_from_dirent(de2)); + } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { + repo_git_add(depth + 1, path, de2); + } else { + fast_export_delete(depth + 1, path); + repo_git_add(depth + 1, path, de2); + } + de1 = dent_next(&dir1->entries, de1); + de2 = dent_next(&dir2->entries, de2); + } + while (de1) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + } + while (de2) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + } +} + +static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; + +void repo_diff(uint32_t r1, uint32_t r2) +{ + repo_diff_r(0, + path_stack, + repo_commit_root_dir(commit_pointer(r1)), + repo_commit_root_dir(commit_pointer(r2))); +} + +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, unsigned long timestamp) +{ + fast_export_commit(revision, author, log, uuid, url, timestamp); + dent_commit(); + dir_commit(); + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +static void mark_init(void) +{ + uint32_t i; + mark = 0; + for (i = 0; i < dent_pool.size; i++) + if (!repo_dirent_is_dir(dent_pointer(i)) && + dent_pointer(i)->content_offset > mark) + mark = dent_pointer(i)->content_offset; + mark++; +} + +void repo_init(void) +{ + mark_init(); + if (commit_pool.size == 0) { + /* Create empty tree for commit 0. */ + commit_alloc(1); + commit_pointer(0)->root_dir_offset = dir_alloc(1); + dir_pointer(0)->entries.trp_root = ~0; + dir_commit(); + } + /* Preallocate next commit, ready for changes. */ + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +void repo_reset(void) +{ + pool_reset(); + commit_reset(); + dir_reset(); + dent_reset(); +} diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h new file mode 100644 index 0000000000..5476175922 --- /dev/null +++ b/vcs-svn/repo_tree.h @@ -0,0 +1,26 @@ +#ifndef REPO_TREE_H_ +#define REPO_TREE_H_ + +#include "git-compat-util.h" + +#define REPO_MODE_DIR 0040000 +#define REPO_MODE_BLB 0100644 +#define REPO_MODE_EXE 0100755 +#define REPO_MODE_LNK 0120000 + +#define REPO_MAX_PATH_LEN 4096 +#define REPO_MAX_PATH_DEPTH 1000 + +uint32_t next_blob_mark(void); +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +void repo_delete(uint32_t *path); +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, long unsigned timestamp); +void repo_diff(uint32_t r1, uint32_t r2); +void repo_init(void); +void repo_reset(void); + +#endif diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c new file mode 100644 index 0000000000..f5b1da836e --- /dev/null +++ b/vcs-svn/string_pool.c @@ -0,0 +1,102 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "trp.h" +#include "obj_pool.h" +#include "string_pool.h" + +static struct trp_root tree = { ~0 }; + +struct node { + uint32_t offset; + struct trp_node children; +}; + +/* Two memory pools: one for struct node, and another for strings */ +obj_pool_gen(node, struct node, 4096) +obj_pool_gen(string, char, 4096) + +static char *node_value(struct node *node) +{ + return node ? string_pointer(node->offset) : NULL; +} + +static int node_cmp(struct node *a, struct node *b) +{ + return strcmp(node_value(a), node_value(b)); +} + +/* Build a Treap from the node structure (a trp_node w/ offset) */ +trp_gen(static, tree_, struct node, children, node, node_cmp); + +const char *pool_fetch(uint32_t entry) +{ + return node_value(node_pointer(entry)); +} + +uint32_t pool_intern(const char *key) +{ + /* Canonicalize key */ + struct node *match = NULL, *node; + uint32_t key_len; + if (key == NULL) + return ~0; + key_len = strlen(key) + 1; + node = node_pointer(node_alloc(1)); + node->offset = string_alloc(key_len); + strcpy(node_value(node), key); + match = tree_search(&tree, node); + if (!match) { + tree_insert(&tree, node); + } else { + node_free(1); + string_free(key_len); + node = match; + } + return node_offset(node); +} + +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) +{ + char *token = strtok_r(str, delim, saveptr); + return token ? pool_intern(token) : ~0; +} + +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) +{ + uint32_t i; + for (i = 0; i < len && ~seq[i]; i++) { + fputs(pool_fetch(seq[i]), stream); + if (i < len - 1 && ~seq[i + 1]) + fputc(delim, stream); + } +} + +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) +{ + char *context = NULL; + uint32_t token = ~0; + uint32_t length; + + if (sz == 0) + return ~0; + if (str) + token = pool_tok_r(str, delim, &context); + for (length = 0; length < sz; length++) { + seq[length] = token; + if (token == ~0) + return length; + token = pool_tok_r(NULL, delim, &context); + } + seq[sz - 1] = ~0; + return sz; +} + +void pool_reset(void) +{ + node_reset(); + string_reset(); +} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h new file mode 100644 index 0000000000..222fb66e68 --- /dev/null +++ b/vcs-svn/string_pool.h @@ -0,0 +1,11 @@ +#ifndef STRING_POOL_H_ +#define STRING_POOL_H_ + +uint32_t pool_intern(const char *key); +const char *pool_fetch(uint32_t entry); +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); +void pool_reset(void); + +#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt new file mode 100644 index 0000000000..1b41f15628 --- /dev/null +++ b/vcs-svn/string_pool.txt @@ -0,0 +1,43 @@ +string_pool API +=============== + +The string_pool API provides facilities for replacing strings +with integer keys that can be more easily compared and stored. +The facilities are designed so that one could teach Git without +too much trouble to store the information needed for these keys to +remain valid over multiple executions. + +Functions +--------- + +pool_intern:: + Include a string in the string pool and get its key. + If that string is already in the pool, retrieves its + existing key. + +pool_fetch:: + Retrieve the string associated to a given key. + +pool_tok_r:: + Extract the key of the next token from a string. + Interface mimics strtok_r. + +pool_print_seq:: + Print a sequence of strings named by key to a file, using the + specified delimiter to separate them. + + If NULL (key ~0) appears in the sequence, the sequence ends + early. + +pool_tok_seq:: + Split a string into tokens, storing the keys of segments + into a caller-provided array. + + Unless sz is 0, the array will always be ~0-terminated. + If there is not enough room for all the tokens, the + array holds as many tokens as fit in the entries before + the terminating ~0. Return value is the index after the + last token, or sz if the tokens did not fit. + +pool_reset:: + Deallocate storage for the string pool. diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c new file mode 100644 index 0000000000..630eeb53b7 --- /dev/null +++ b/vcs-svn/svndump.c @@ -0,0 +1,302 @@ +/* + * Parse and rearrange a svnadmin dump. + * Create the dump with: + * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "cache.h" +#include "repo_tree.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "obj_pool.h" +#include "string_pool.h" + +#define NODEACT_REPLACE 4 +#define NODEACT_DELETE 3 +#define NODEACT_ADD 2 +#define NODEACT_CHANGE 1 +#define NODEACT_UNKNOWN 0 + +#define DUMP_CTX 0 +#define REV_CTX 1 +#define NODE_CTX 2 + +#define LENGTH_UNKNOWN (~0) +#define DATE_RFC2822_LEN 31 + +/* Create memory pool for log messages */ +obj_pool_gen(log, char, 4096) + +static char* log_copy(uint32_t length, char *log) +{ + char *buffer; + log_free(log_pool.size); + buffer = log_pointer(log_alloc(length)); + strncpy(buffer, log, length); + return buffer; +} + +static struct { + uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; +} node_ctx; + +static struct { + uint32_t revision, author; + unsigned long timestamp; + char *log; +} rev_ctx; + +static struct { + uint32_t uuid, url; +} dump_ctx; + +static struct { + uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, + revision_number, node_path, node_kind, node_action, + node_copyfrom_path, node_copyfrom_rev, text_content_length, + prop_content_length, content_length; +} keys; + +static void reset_node_ctx(char *fname) +{ + node_ctx.type = 0; + node_ctx.action = NODEACT_UNKNOWN; + node_ctx.propLength = LENGTH_UNKNOWN; + node_ctx.textLength = LENGTH_UNKNOWN; + node_ctx.src[0] = ~0; + node_ctx.srcRev = 0; + node_ctx.srcMode = 0; + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + node_ctx.mark = 0; +} + +static void reset_rev_ctx(uint32_t revision) +{ + rev_ctx.revision = revision; + rev_ctx.timestamp = 0; + rev_ctx.log = NULL; + rev_ctx.author = ~0; +} + +static void reset_dump_ctx(uint32_t url) +{ + dump_ctx.url = url; + dump_ctx.uuid = ~0; +} + +static void init_keys(void) +{ + keys.svn_log = pool_intern("svn:log"); + keys.svn_author = pool_intern("svn:author"); + keys.svn_date = pool_intern("svn:date"); + keys.svn_executable = pool_intern("svn:executable"); + keys.svn_special = pool_intern("svn:special"); + keys.uuid = pool_intern("UUID"); + keys.revision_number = pool_intern("Revision-number"); + keys.node_path = pool_intern("Node-path"); + keys.node_kind = pool_intern("Node-kind"); + keys.node_action = pool_intern("Node-action"); + keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); + keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); + keys.text_content_length = pool_intern("Text-content-length"); + keys.prop_content_length = pool_intern("Prop-content-length"); + keys.content_length = pool_intern("Content-length"); +} + +static void read_props(void) +{ + uint32_t len; + uint32_t key = ~0; + char *val = NULL; + char *t; + while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { + if (!strncmp(t, "K ", 2)) { + len = atoi(&t[2]); + key = pool_intern(buffer_read_string(len)); + buffer_read_line(); + } else if (!strncmp(t, "V ", 2)) { + len = atoi(&t[2]); + val = buffer_read_string(len); + if (key == keys.svn_log) { + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + fprintf(stderr, "Invalid timestamp: %s\n", val); + } else if (key == keys.svn_executable) { + node_ctx.type = REPO_MODE_EXE; + } else if (key == keys.svn_special) { + node_ctx.type = REPO_MODE_LNK; + } + key = ~0; + buffer_read_line(); + } + } +} + +static void handle_node(void) +{ + if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) + read_props(); + + if (node_ctx.srcRev) + node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + + if (node_ctx.textLength != LENGTH_UNKNOWN && + node_ctx.type != REPO_MODE_DIR) + node_ctx.mark = next_blob_mark(); + + if (node_ctx.action == NODEACT_DELETE) { + repo_delete(node_ctx.dst); + } else if (node_ctx.action == NODEACT_CHANGE || + node_ctx.action == NODEACT_REPLACE) { + if (node_ctx.action == NODEACT_REPLACE && + node_ctx.type == REPO_MODE_DIR) + repo_replace(node_ctx.dst, node_ctx.mark); + else if (node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + } else if (node_ctx.action == NODEACT_ADD) { + if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || + node_ctx.textLength != LENGTH_UNKNOWN) + repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + } + + if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) + node_ctx.type = node_ctx.srcMode; + + if (node_ctx.mark) + fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + buffer_skip_bytes(node_ctx.textLength); +} + +static void handle_revision(void) +{ + if (rev_ctx.revision) + repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); +} + +void svndump_read(const char *url) +{ + char *val; + char *t; + uint32_t active_ctx = DUMP_CTX; + uint32_t len; + uint32_t key; + + reset_dump_ctx(pool_intern(url)); + while ((t = buffer_read_line())) { + val = strstr(t, ": "); + if (!val) + continue; + *val++ = '\0'; + *val++ = '\0'; + key = pool_intern(t); + + if (key == keys.uuid) { + dump_ctx.uuid = pool_intern(val); + } else if (key == keys.revision_number) { + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); + active_ctx = REV_CTX; + reset_rev_ctx(atoi(val)); + } else if (key == keys.node_path) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + } else if (key == keys.node_kind) { + if (!strcmp(val, "dir")) + node_ctx.type = REPO_MODE_DIR; + else if (!strcmp(val, "file")) + node_ctx.type = REPO_MODE_BLB; + else + fprintf(stderr, "Unknown node-kind: %s\n", val); + } else if (key == keys.node_action) { + if (!strcmp(val, "delete")) { + node_ctx.action = NODEACT_DELETE; + } else if (!strcmp(val, "add")) { + node_ctx.action = NODEACT_ADD; + } else if (!strcmp(val, "change")) { + node_ctx.action = NODEACT_CHANGE; + } else if (!strcmp(val, "replace")) { + node_ctx.action = NODEACT_REPLACE; + } else { + fprintf(stderr, "Unknown node-action: %s\n", val); + node_ctx.action = NODEACT_UNKNOWN; + } + } else if (key == keys.node_copyfrom_path) { + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + } else if (key == keys.node_copyfrom_rev) { + node_ctx.srcRev = atoi(val); + } else if (key == keys.text_content_length) { + node_ctx.textLength = atoi(val); + } else if (key == keys.prop_content_length) { + node_ctx.propLength = atoi(val); + } else if (key == keys.content_length) { + len = atoi(val); + buffer_read_line(); + if (active_ctx == REV_CTX) { + read_props(); + } else if (active_ctx == NODE_CTX) { + handle_node(); + active_ctx = REV_CTX; + } else { + fprintf(stderr, "Unexpected content length header: %d\n", len); + buffer_skip_bytes(len); + } + } + } + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); +} + +void svndump_init(const char *filename) +{ + buffer_init(filename); + repo_init(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + init_keys(); +} + +void svndump_deinit(void) +{ + log_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + if (buffer_deinit()) + fprintf(stderr, "Input error\n"); + if (ferror(stdout)) + fprintf(stderr, "Output error\n"); +} + +void svndump_reset(void) +{ + log_reset(); + buffer_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); +} diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h new file mode 100644 index 0000000000..93c412f14a --- /dev/null +++ b/vcs-svn/svndump.h @@ -0,0 +1,9 @@ +#ifndef SVNDUMP_H_ +#define SVNDUMP_H_ + +void svndump_init(const char *filename); +void svndump_read(const char *url); +void svndump_deinit(void); +void svndump_reset(void); + +#endif diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h new file mode 100644 index 0000000000..ee35c688a0 --- /dev/null +++ b/vcs-svn/trp.h @@ -0,0 +1,236 @@ +/* + * C macro implementation of treaps. + * + * Usage: + * #include <stdint.h> + * #include "trp.h" + * trp_gen(...) + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef TRP_H_ +#define TRP_H_ + +#define MAYBE_UNUSED __attribute__((__unused__)) + +/* Node structure. */ +struct trp_node { + uint32_t trpn_left; + uint32_t trpn_right; +}; + +/* Root structure. */ +struct trp_root { + uint32_t trp_root; +}; + +/* Pointer/Offset conversion. */ +#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) +#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) +#define trpn_modify(a_base, a_offset) \ + do { \ + if ((a_offset) < a_base##_pool.committed) { \ + uint32_t old_offset = (a_offset);\ + (a_offset) = a_base##_alloc(1); \ + *trpn_pointer(a_base, a_offset) = \ + *trpn_pointer(a_base, old_offset); \ + } \ + } while (0) + +/* Left accessors. */ +#define trp_left_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_left) +#define trp_left_set(a_base, a_field, a_node, a_left) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_left_get(a_base, a_field, a_node) = (a_left); \ + } while (0) + +/* Right accessors. */ +#define trp_right_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_right) +#define trp_right_set(a_base, a_field, a_node, a_right) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_right_get(a_base, a_field, a_node) = (a_right); \ + } while (0) + +/* + * Fibonacci hash function. + * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). + * See Knuth §6.4: volume 3, 3rd ed, p518. + */ +#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) + +/* Priority accessors. */ +#define trp_prio_get(a_node) trpn_hash(a_node) + +/* Node initializer. */ +#define trp_node_new(a_base, a_field, a_node) \ + do { \ + trp_left_set(a_base, a_field, (a_node), ~0); \ + trp_right_set(a_base, a_field, (a_node), ~0); \ + } while (0) + +/* Internal utility macros. */ +#define trpn_first(a_base, a_field, a_root, r_node) \ + do { \ + (r_node) = (a_root); \ + if ((r_node) == ~0) \ + return NULL; \ + while (~trp_left_get(a_base, a_field, (r_node))) \ + (r_node) = trp_left_get(a_base, a_field, (r_node)); \ + } while (0) + +#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_right_get(a_base, a_field, (a_node)); \ + trp_right_set(a_base, a_field, (a_node), \ + trp_left_get(a_base, a_field, (r_node))); \ + trp_left_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_left_get(a_base, a_field, (a_node)); \ + trp_left_set(a_base, a_field, (a_node), \ + trp_right_get(a_base, a_field, (r_node))); \ + trp_right_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ +a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ +{ \ + uint32_t ret; \ + trpn_first(a_base, a_field, treap->trp_root, ret); \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t ret; \ + uint32_t offset = trpn_offset(a_base, node); \ + if (~trp_right_get(a_base, a_field, offset)) { \ + trpn_first(a_base, a_field, \ + trp_right_get(a_base, a_field, offset), ret); \ + } else { \ + uint32_t tnode = treap->trp_root; \ + ret = ~0; \ + while (1) { \ + int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ + trpn_pointer(a_base, tnode)); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = trp_left_get(a_base, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = trp_right_get(a_base, a_field, tnode); \ + } else { \ + break; \ + } \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + if (!~trp_left_get(a_base, a_field, ret)) \ + break; \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ +{ \ + if (cur_node == ~0) { \ + return ins_node; \ + } else { \ + uint32_t ret; \ + int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp < 0) { \ + uint32_t left = a_pre##insert_recurse( \ + trp_left_get(a_base, a_field, cur_node), ins_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + if (trp_prio_get(left) < trp_prio_get(cur_node)) \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } else { \ + uint32_t right = a_pre##insert_recurse( \ + trp_right_get(a_base, a_field, cur_node), ins_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + if (trp_prio_get(right) < trp_prio_get(cur_node)) \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } \ + return ret; \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t offset = trpn_offset(a_base, node); \ + trp_node_new(a_base, a_field, offset); \ + treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ +{ \ + int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp == 0) { \ + uint32_t ret; \ + uint32_t left = trp_left_get(a_base, a_field, cur_node); \ + uint32_t right = trp_right_get(a_base, a_field, cur_node); \ + if (left == ~0) { \ + if (right == ~0) \ + return ~0; \ + } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + right = a_pre##remove_recurse(cur_node, rem_node); \ + trp_right_set(a_base, a_field, ret, right); \ + return ret; \ + } \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + left = a_pre##remove_recurse(cur_node, rem_node); \ + trp_left_set(a_base, a_field, ret, left); \ + return ret; \ + } else if (cmp < 0) { \ + uint32_t left = a_pre##remove_recurse( \ + trp_left_get(a_base, a_field, cur_node), rem_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + return cur_node; \ + } else { \ + uint32_t right = a_pre##remove_recurse( \ + trp_right_get(a_base, a_field, cur_node), rem_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + return cur_node; \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ +{ \ + treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ + trpn_offset(a_base, node)); \ +} \ + +#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt new file mode 100644 index 0000000000..eb4c191875 --- /dev/null +++ b/vcs-svn/trp.txt @@ -0,0 +1,103 @@ +Motivation +========== + +Treaps provide a memory-efficient binary search tree structure. +Insertion/deletion/search are about as about as fast in the average +case as red-black trees and the chances of worst-case behavior are +vanishingly small, thanks to (pseudo-)randomness. The bad worst-case +behavior is a small price to pay, given that treaps are much simpler +to implement. + +API +=== + +The trp API generates a data structure and functions to handle a +large growing set of objects stored in a pool. + +The caller: + +. Specifies parameters for the generated functions with the + trp_gen(static, foo_, ...) macro. + +. Allocates a `struct trp_root` variable and sets it to {~0}. + +. Adds new nodes to the set using `foo_insert`. + +. Can find a specific item in the set using `foo_search`. + +. Can iterate over items in the set using `foo_first` and `foo_next`. + +. Can remove an item from the set using `foo_remove`. + +Example: + +---- +struct ex_node { + const char *s; + struct trp_node ex_link; +}; +static struct trp_root ex_base = {~0}; +obj_pool_gen(ex, struct ex_node, 4096); +trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) +struct ex_node *item; + +item = ex_pointer(ex_alloc(1)); +item->s = "hello"; +ex_insert(&ex_base, item); +item = ex_pointer(ex_alloc(1)); +item->s = "goodbye"; +ex_insert(&ex_base, item); +for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) + printf("%s\n", item->s); +---- + +Functions +--------- + +trp_gen(attr, foo_, node_type, link_field, pool, cmp):: + + Generate a type-specific treap implementation. ++ +. The storage class for generated functions will be 'attr' (e.g., `static`). +. Generated function names are prefixed with 'foo_' (e.g., `treap_`). +. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). + This type must be a struct with at least one `struct trp_node` field + to point to its children. +. The field used to access child nodes will be 'link_field'. +. All treap nodes must lie in the 'pool' object pool. +. Treap nodes must be totally ordered by the 'cmp' relation, with the + following prototype: ++ +int (*cmp)(node_type \*a, node_type \*b) ++ +and returning a value less than, equal to, or greater than zero +according to the result of comparison. + +void foo_insert(struct trp_root *treap, node_type \*node):: + + Insert node into treap. If inserted multiple times, + a node will appear in the treap multiple times. + +void foo_remove(struct trp_root *treap, node_type \*node):: + + Remove node from treap. Caller must ensure node is + present in treap before using this function. + +node_type *foo_search(struct trp_root \*treap, node_type \*key):: + + Search for a node that matches key. If no match is found, + result is NULL. + +node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: + + Like `foo_search`, but if if the key is missing return what + would be key's successor, were key in treap (NULL if no + successor). + +node_type *foo_first(struct trp_root \*treap):: + + Find the first item from the treap, in sorted order. + +node_type *foo_next(struct trp_root \*treap, node_type \*node):: + + Find the next item. @@ -40,7 +40,8 @@ void *xmalloc(size_t size) if (!ret && !size) ret = malloc(1); if (!ret) - die("Out of memory, malloc failed"); + die("Out of memory, malloc failed (tried to allocate %lu bytes)", + (unsigned long)size); } #ifdef XMALLOC_POISON memset(ret, 0xA5, size); diff --git a/wt-status.c b/wt-status.c index 2f9e33c8fa..54b6b03b9c 100644 --- a/wt-status.c +++ b/wt-status.c @@ -313,8 +313,10 @@ static void wt_status_collect_changes_worktree(struct wt_status *s) DIFF_OPT_SET(&rev.diffopt, DIRTY_SUBMODULES); if (!s->show_untracked_files) DIFF_OPT_SET(&rev.diffopt, IGNORE_UNTRACKED_IN_SUBMODULES); - if (s->ignore_submodule_arg) + if (s->ignore_submodule_arg) { + DIFF_OPT_SET(&rev.diffopt, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(&rev.diffopt, s->ignore_submodule_arg); + } rev.diffopt.format_callback = wt_status_collect_changed_cb; rev.diffopt.format_callback_data = s; rev.prune_data = s->pathspec; @@ -331,8 +333,10 @@ static void wt_status_collect_changes_index(struct wt_status *s) opt.def = s->is_initial ? EMPTY_TREE_SHA1_HEX : s->reference; setup_revisions(0, NULL, &rev, &opt); - if (s->ignore_submodule_arg) + if (s->ignore_submodule_arg) { + DIFF_OPT_SET(&rev.diffopt, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(&rev.diffopt, s->ignore_submodule_arg); + } rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK; rev.diffopt.format_callback = wt_status_collect_updated_cb; |