diff options
313 files changed, 20945 insertions, 7851 deletions
diff --git a/.gitignore b/.gitignore index fcdd822d8a..20560b810b 100644 --- a/.gitignore +++ b/.gitignore @@ -166,12 +166,17 @@ /test-dump-cache-tree /test-genrandom /test-index-version +/test-line-buffer /test-match-trees +/test-obj-pool /test-parse-options /test-path-utils /test-run-command /test-sha1 /test-sigchain +/test-string-pool +/test-svn-fe +/test-treap /common-cmds.h *.tar.gz *.dsc @@ -181,6 +186,12 @@ *.[aos] *.py[co] .depend/ +*.gcda +*.gcno +*.gcov +/coverage-untested-functions +/cover_db/ +/cover_db_html/ *+ /config.mak /autom4te.cache diff --git a/Documentation/Makefile b/Documentation/Makefile index a4c4063e50..e117bc4315 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -279,7 +279,7 @@ $(patsubst %,%.html,$(API_DOCS) technical/api-index): %.html : %.txt XSLT = docbook.xsl XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css -user-manual.html: user-manual.xml +user-manual.html: user-manual.xml $(XSLT) $(QUIET_XSLTPROC)$(RM) $@+ $@ && \ xsltproc $(XSLTOPTS) -o $@+ $(XSLT) $< && \ mv $@+ $@ diff --git a/Documentation/RelNotes-1.7.3.txt b/Documentation/RelNotes-1.7.3.txt new file mode 100644 index 0000000000..3512bbb238 --- /dev/null +++ b/Documentation/RelNotes-1.7.3.txt @@ -0,0 +1,73 @@ +Git v1.7.3 Release Notes (draft) +================================ + +Updates since v1.7.2 +-------------------- + + * git-gui got various updates and a new maintainer, Pat Thoyts. + + * Gitweb allows its configuration to change per each request; it used to + read the configuration once upon startup. + + * When git finds a corrupt object, it now reports the file that contains + it. + + * "git checkout -B <it>" is a shorter way to say "git branch -f <it>" + followed by "git checkout <it>". + + * When "git checkout" or "git merge" refuse to proceed in order to + protect local modification to your working tree, they used to stop + after showing just one path that might be lost. They now show all, + in a format that is easier to read. + + * "git clean" learned "-e" ("--exclude") option. + + * Hunk headers produced for C# files by "git diff" and friends show more + relevant context than before. + + * diff.ignoresubmodules configuration variable can be used to squelch the + differences in submodules reported when running commands (e.g. "diff", + "status", etc.) at the superproject level. + + * http.useragent configuration can be used to lie who you are to your + restrictive firewall. + + * "git rebase --strategy <s>" learned "-X" option to pass extra options + that are understood by the chosen merge strategy. + + * "git rebase -i" learned "exec" that you can insert into the insn sheet + to run a command between its steps. + + * "git rebase" between branches that have many binary changes that do + not conflict should be faster. + + * "git rebase -i" peeks into rebase.autosquash configuration and acts as + if you gave --autosquash from the command line. + + +Also contains various documentation updates. + + +Fixes since v1.7.2 +------------------ + +All of the fixes in v1.7.2.X maintenance series are included in this +release, unless otherwise noted. + + * "git merge -s recursive" (which is the default) did not handle cases + where a directory becomes a file (or vice versa) very well. + + * "git fetch" and friends were accidentally broken for url with "+" in + its path, e.g. "git://git.gnome.org/gtk+". + +--- +exec >/var/tmp/1 +echo O=$(git describe master) +O=v1.7.2.2-268-g7e42332 +O=v1.7.2 +git shortlog --no-merges $O..master ^maint +exit 0 + +What did we want to do with... + +1e3d411 (Enable custom schemes for column colors in the graph API, 2010-07-13) diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index eb53e0636e..ece3c77482 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -7,17 +7,16 @@ Checklist (and a short version for the impatient): before committing - do not check in commented out code or unneeded files - the first line of the commit message should be a short - description and should skip the full stop + description (50 characters is the soft limit, see DISCUSSION + in git-commit(1)), and should skip the full stop - the body should provide a meaningful commit message, which: - uses the imperative, present tense: "change", not "changed" or "changes". - includes motivation for the change, and contrasts its implementation with previous behaviour - - if you want your work included in git.git, add a - "Signed-off-by: Your Name <you@example.com>" line to the - commit message (or just use the option "-s" when - committing) to confirm that you agree to the Developer's - Certificate of Origin + - add a "Signed-off-by: Your Name <you@example.com>" line to the + commit message (or just use the option "-s" when committing) + to confirm that you agree to the Developer's Certificate of Origin - make sure that you have tests for the bug you are fixing - make sure that the test suite passes after your commit diff --git a/Documentation/asciidoc.conf b/Documentation/asciidoc.conf index 87a90f2c3f..aea8627be0 100644 --- a/Documentation/asciidoc.conf +++ b/Documentation/asciidoc.conf @@ -16,8 +16,11 @@ plus=+ caret=^ startsb=[ endsb=] +backslash=\ tilde=~ +apostrophe=' backtick=` +litdd=-- ifdef::backend-docbook[] [linkgit-inlinemacro] diff --git a/Documentation/config.txt b/Documentation/config.txt index dc4e83b103..cda6721013 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -826,6 +826,12 @@ diff.renames:: will enable basic rename detection. If set to "copies" or "copy", it will detect copies, as well. +diff.ignoreSubmodules:: + Sets the default value of --ignore-submodules. Note that this + affects only 'git diff' Porcelain, and not lower level 'diff' + commands such as 'git diff-files'. 'git checkout' also honors + this setting when reporting uncommitted changes. + diff.suppressBlankEmpty:: A boolean to inhibit the standard behavior of printing a space before each empty output line. Defaults to false. @@ -1243,6 +1249,15 @@ http.noEPSV:: support EPSV mode. Can be overridden by the 'GIT_CURL_FTP_NO_EPSV' environment variable. Default is false (curl will use EPSV). +http.useragent:: + The HTTP USER_AGENT string presented to an HTTP server. The default + value represents the version of the client git such as git/1.7.1. + This option allows you to override this value to a more common value + such as Mozilla/4.0. This may be necessary, for instance, if + connecting through a firewall that restricts HTTP connections to a set + of common USER_AGENT strings (but not including those like git/1.7.1). + Can be overridden by the 'GIT_HTTP_USER_AGENT' environment variable. + i18n.commitEncoding:: Character encoding the commit messages are stored in; git itself does not care per se, but this information is necessary e.g. when @@ -1291,10 +1306,11 @@ interactive.singlekey:: ignored if portable keystroke input is not available. log.date:: - Set default date-time mode for the log command. Setting log.date - value is similar to using 'git log'\'s --date option. The value is one of the - following alternatives: {relative,local,default,iso,rfc,short}. - See linkgit:git-log[1]. + Set the default date-time mode for the 'log' command. + Setting a value for log.date is similar to using 'git log''s + `\--date` option. Possible values are `relative`, `local`, + `default`, `iso`, `rfc`, and `short`; see linkgit:git-log[1] + for details. log.decorate:: Print out the ref names of any commits that are shown by the log @@ -1535,6 +1551,9 @@ rebase.stat:: Whether to show a diffstat of what changed upstream since the last rebase. False by default. +rebase.autosquash:: + If set to true enable '--autosquash' option by default. + receive.autogc:: By default, git-receive-pack will run "git-gc --auto" after receiving data from git-push and updating refs. You can stop @@ -1751,6 +1770,19 @@ submodule.<name>.update:: URL and other values found in the `.gitmodules` file. See linkgit:git-submodule[1] and linkgit:gitmodules[5] for details. +submodule.<name>.ignore:: + Defines under what circumstances "git status" and the diff family show + a submodule as modified. When set to "all", it will never be considered + modified, "dirty" will ignore all changes to the submodules work tree and + takes only differences between the HEAD of the submodule and the commit + recorded in the superproject into account. "untracked" will additionally + let submodules with modified tracked files in their work tree show up. + Using "none" (the default when this option is not set) also shows + submodules that have untracked files in their work tree as changed. + This setting overrides any setting made in .gitmodules for this submodule, + both settings can be overridden on the command line by using the + "--ignore-submodules" option. + tar.umask:: This variable can be used to restrict the permission bits of tar archive entries. The default is 0002, which turns off the diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index eecedaab6e..4656a97e60 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -355,7 +355,11 @@ endif::git-format-patch[] --ignore-submodules[=<when>]:: Ignore changes to submodules in the diff generation. <when> can be - either "untracked", "dirty" or "all", which is the default. When + either "none", "untracked", "dirty" or "all", which is the default + Using "none" will consider the submodule modified when it either contains + untracked or modified files or its HEAD differs from the commit recorded + in the superproject and can be used to override any settings of the + 'ignore' option in linkgit:git-config[1] or linkgit:gitmodules[5]. When "untracked" is used submodules are not considered dirty when they only contain untracked content (but they are still scanned for modified content). Using "dirty" ignores all changes to the work tree of submodules, diff --git a/Documentation/docbook.xsl b/Documentation/docbook.xsl index 9a6912c641..da8b05b922 100644 --- a/Documentation/docbook.xsl +++ b/Documentation/docbook.xsl @@ -1,5 +1,8 @@ <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version='1.0'> <xsl:import href="http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl"/> - <xsl:output method="html" encoding="UTF-8" indent="no" /> + <xsl:output method="html" + encoding="UTF-8" indent="no" + doctype-public="-//W3C//DTD HTML 4.01//EN" + doctype-system="http://www.w3.org/TR/html4/strict.dtd" /> </xsl:stylesheet> diff --git a/Documentation/git-add.txt b/Documentation/git-add.txt index e22a62f065..73378b2bef 100644 --- a/Documentation/git-add.txt +++ b/Documentation/git-add.txt @@ -157,14 +157,14 @@ those in info/exclude. See linkgit:gitrepository-layout[5]. EXAMPLES -------- -* Adds content from all `\*.txt` files under `Documentation` directory +* Adds content from all `*.txt` files under `Documentation` directory and its subdirectories: + ------------ $ git add Documentation/\*.txt ------------ + -Note that the asterisk `\*` is quoted from the shell in this +Note that the asterisk `*` is quoted from the shell in this example; this lets the command include the files from subdirectories of `Documentation/` directory. @@ -220,7 +220,7 @@ binary so line count cannot be shown) and there is no difference between indexed copy and the working tree version (if the working tree version were also different, 'binary' would have been shown in place of 'nothing'). The -other file, git-add--interactive.perl, has 403 lines added +other file, git-add{litdd}interactive.perl, has 403 lines added and 35 lines deleted if you commit what is in the index, but working tree file has further modifications (one addition and one deletion). diff --git a/Documentation/git-archimport.txt b/Documentation/git-archimport.txt index 4d4325f222..4f358c8d6c 100644 --- a/Documentation/git-archimport.txt +++ b/Documentation/git-archimport.txt @@ -44,7 +44,7 @@ archives that it imports, it is also possible to specify git branch names manually. To do so, write a git branch name after each <archive/branch> parameter, separated by a colon. This way, you can shorten the Arch branch names and convert Arch jargon to git jargon, for example mapping a -"PROJECT--devo--VERSION" branch to "master". +"PROJECT{litdd}devo{litdd}VERSION" branch to "master". Associating multiple Arch branches to one git branch is possible; the result will make the most sense only if no commits are made to the first @@ -85,8 +85,8 @@ OPTIONS -o:: Use this for compatibility with old-style branch names used by earlier versions of 'git archimport'. Old-style branch names - were category--branch, whereas new-style branch names are - archive,category--branch--version. In both cases, names given + were category{litdd}branch, whereas new-style branch names are + archive,category{litdd}branch{litdd}version. In both cases, names given on the command-line will override the automatically-generated ones. diff --git a/Documentation/git-bisect-lk2009.txt b/Documentation/git-bisect-lk2009.txt index efbe3790bb..8a2ba37904 100644 --- a/Documentation/git-bisect-lk2009.txt +++ b/Documentation/git-bisect-lk2009.txt @@ -873,7 +873,7 @@ c * N * T + b * M * log2(M) tests where c is the number of rounds of test (so a small constant) and b is the ratio of bug per commit (hopefully a small constant too). -So of course it's much better as it's O(N \* T) vs O(N \* T \* M) if +So of course it's much better as it's O(N * T) vs O(N * T * M) if you would test everything after each commit. This means that test suites are good to prevent some bugs from being diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt index a5ed8fb05b..38e59afb34 100644 --- a/Documentation/git-bundle.txt +++ b/Documentation/git-bundle.txt @@ -9,7 +9,7 @@ git-bundle - Move objects and refs by archive SYNOPSIS -------- [verse] -'git bundle' create <file> <git-rev-list args> +'git bundle' create <file> <git-rev-list-args> 'git bundle' verify <file> 'git bundle' list-heads <file> [refname...] 'git bundle' unbundle <file> [refname...] @@ -34,57 +34,58 @@ OPTIONS ------- create <file>:: - Used to create a bundle named 'file'. This requires the - 'git rev-list' arguments to define the bundle contents. + Used to create a bundle named 'file'. This requires the + 'git-rev-list-args' arguments to define the bundle contents. verify <file>:: - Used to check that a bundle file is valid and will apply - cleanly to the current repository. This includes checks on the - bundle format itself as well as checking that the prerequisite - commits exist and are fully linked in the current repository. - 'git bundle' prints a list of missing commits, if any, and exits - with a non-zero status. + Used to check that a bundle file is valid and will apply + cleanly to the current repository. This includes checks on the + bundle format itself as well as checking that the prerequisite + commits exist and are fully linked in the current repository. + 'git bundle' prints a list of missing commits, if any, and exits + with a non-zero status. list-heads <file>:: - Lists the references defined in the bundle. If followed by a - list of references, only references matching those given are - printed out. + Lists the references defined in the bundle. If followed by a + list of references, only references matching those given are + printed out. unbundle <file>:: - Passes the objects in the bundle to 'git index-pack' - for storage in the repository, then prints the names of all - defined references. If a list of references is given, only - references matching those in the list are printed. This command is - really plumbing, intended to be called only by 'git fetch'. - -[git-rev-list-args...]:: - A list of arguments, acceptable to 'git rev-parse' and - 'git rev-list', that specifies the specific objects and references - to transport. For example, `master\~10..master` causes the - current master reference to be packaged along with all objects - added since its 10th ancestor commit. There is no explicit - limit to the number of references and objects that may be - packaged. + Passes the objects in the bundle to 'git index-pack' + for storage in the repository, then prints the names of all + defined references. If a list of references is given, only + references matching those in the list are printed. This command is + really plumbing, intended to be called only by 'git fetch'. + +<git-rev-list-args>:: + A list of arguments, acceptable to 'git rev-parse' and + 'git rev-list' (and containg a named ref, see SPECIFYING REFERENCES + below), that specifies the specific objects and references + to transport. For example, `master{tilde}10..master` causes the + current master reference to be packaged along with all objects + added since its 10th ancestor commit. There is no explicit + limit to the number of references and objects that may be + packaged. [refname...]:: - A list of references used to limit the references reported as - available. This is principally of use to 'git fetch', which - expects to receive only those references asked for and not - necessarily everything in the pack (in this case, 'git bundle' acts - like 'git fetch-pack'). + A list of references used to limit the references reported as + available. This is principally of use to 'git fetch', which + expects to receive only those references asked for and not + necessarily everything in the pack (in this case, 'git bundle' acts + like 'git fetch-pack'). SPECIFYING REFERENCES --------------------- 'git bundle' will only package references that are shown by 'git show-ref': this includes heads, tags, and remote heads. References -such as `master\~1` cannot be packaged, but are perfectly suitable for +such as `master{tilde}1` cannot be packaged, but are perfectly suitable for defining the basis. More than one reference may be packaged, and more than one basis can be specified. The objects packaged are those not contained in the union of the given bases. Each basis can be -specified explicitly (e.g. `^master\~10`), or implicitly (e.g. -`master\~10..master`, `--since=10.days.ago master`). +specified explicitly (e.g. `^master{tilde}10`), or implicitly (e.g. +`master{tilde}10..master`, `--since=10.days.ago master`). It is very important that the basis used be held by the destination. It is okay to err on the side of caution, causing the bundle file @@ -154,7 +155,7 @@ machineB$ git pull If you know up to what commit the intended recipient repository should have the necessary objects, you can use that knowledge to specify the basis, giving a cut-off point to limit the revisions and objects that go -in the resulting bundle. The previous example used lastR2bundle tag +in the resulting bundle. The previous example used the lastR2bundle tag for this purpose, but you can use any other options that you would give to the linkgit:git-log[1] command. Here are more examples: @@ -194,7 +195,7 @@ references when fetching: $ git fetch mybundle master:localRef ---------------- -You can also see what references it offers. +You can also see what references it offers: ---------------- $ git ls-remote mybundle diff --git a/Documentation/git-checkout-index.txt b/Documentation/git-checkout-index.txt index d6aa6e14eb..62f9ab24c9 100644 --- a/Documentation/git-checkout-index.txt +++ b/Documentation/git-checkout-index.txt @@ -13,7 +13,7 @@ SYNOPSIS [--stage=<number>|all] [--temp] [-z] [--stdin] - [--] [<file>]\* + [--] [<file>]* DESCRIPTION ----------- diff --git a/Documentation/git-checkout.txt b/Documentation/git-checkout.txt index 1bacd2e104..f88e9977d1 100644 --- a/Documentation/git-checkout.txt +++ b/Documentation/git-checkout.txt @@ -9,7 +9,7 @@ SYNOPSIS -------- [verse] 'git checkout' [-q] [-f] [-m] [<branch>] -'git checkout' [-q] [-f] [-m] [[-b|--orphan] <new_branch>] [<start_point>] +'git checkout' [-q] [-f] [-m] [[-b|-B|--orphan] <new_branch>] [<start_point>] 'git checkout' [-f|--ours|--theirs|-m|--conflict=<style>] [<tree-ish>] [--] <paths>... 'git checkout' --patch [<tree-ish>] [--] [<paths>...] @@ -21,7 +21,7 @@ also update `HEAD` to set the specified branch as the current branch. 'git checkout' [<branch>]:: -'git checkout' -b <new branch> [<start point>]:: +'git checkout' -b|-B <new_branch> [<start point>]:: This form switches branches by updating the index, working tree, and HEAD to reflect the specified branch. @@ -31,6 +31,17 @@ were called and then checked out; in this case you can use the `--track` or `--no-track` options, which will be passed to 'git branch'. As a convenience, `--track` without `-b` implies branch creation; see the description of `--track` below. ++ +If `-B` is given, <new_branch> is created if it doesn't exist; otherwise, it +is reset. This is the transactional equivalent of ++ +------------ +$ git branch -f <branch> [<start point>] +$ git checkout <branch> +------------ ++ +that is to say, the branch is not reset/created unless "git checkout" is +successful. 'git checkout' [--patch] [<tree-ish>] [--] <pathspec>...:: @@ -75,6 +86,12 @@ entries; instead, unmerged entries are ignored. Create a new branch named <new_branch> and start it at <start_point>; see linkgit:git-branch[1] for details. +-B:: + Creates the branch <new_branch> and start it at <start_point>; + if it already exists, then reset it to <start_point>. This is + equivalent to running "git branch" with "-f"; see + linkgit:git-branch[1] for details. + -t:: --track:: When creating a new branch, set up "upstream" configuration. See @@ -170,7 +187,7 @@ As a special case, the `"@\{-N\}"` syntax for the N-th last branch checks out the branch (instead of detaching). You may also specify `-` which is synonymous with `"@\{-1\}"`. + -As a further special case, you may use `"A...B"` as a shortcut for the +As a further special case, you may use `"A\...B"` as a shortcut for the merge base of `A` and `B` if there is exactly one merge base. You can leave out at most one of `A` and `B`, in which case it defaults to `HEAD`. diff --git a/Documentation/git-clean.txt b/Documentation/git-clean.txt index a81cb6c280..60e38e6e27 100644 --- a/Documentation/git-clean.txt +++ b/Documentation/git-clean.txt @@ -8,7 +8,7 @@ git-clean - Remove untracked files from the working tree SYNOPSIS -------- [verse] -'git clean' [-d] [-f] [-n] [-q] [-x | -X] [--] <path>... +'git clean' [-d] [-f] [-n] [-q] [-e <pattern>] [-x | -X] [--] <path>... DESCRIPTION ----------- @@ -45,6 +45,12 @@ OPTIONS Be quiet, only report errors, but not the files that are successfully removed. +-e <pattern>:: +--exclude=<pattern>:: + Specify special exceptions to not be cleaned. Each <pattern> is + the same form as in $GIT_DIR/info/excludes and this option can be + given multiple times. + -x:: Don't use the ignore rules. This allows removing all untracked files, including build products. This can be used (possibly in diff --git a/Documentation/git-commit-tree.txt b/Documentation/git-commit-tree.txt index 61888547a1..349366ee1e 100644 --- a/Documentation/git-commit-tree.txt +++ b/Documentation/git-commit-tree.txt @@ -8,7 +8,7 @@ git-commit-tree - Create a new commit object SYNOPSIS -------- -'git commit-tree' <tree> [-p <parent commit>]\* < changelog +'git commit-tree' <tree> [-p <parent commit>]* < changelog DESCRIPTION ----------- diff --git a/Documentation/git-fast-export.txt b/Documentation/git-fast-export.txt index 98ec6b5871..fcad113276 100644 --- a/Documentation/git-fast-export.txt +++ b/Documentation/git-fast-export.txt @@ -90,10 +90,16 @@ marks the same across runs. resulting stream can only be used by a repository which already contains the necessary objects. +--full-tree:: + This option will cause fast-export to issue a "deleteall" + directive for each commit followed by a full list of all files + in the commit (as opposed to just listing the files which are + different from the commit's first parent). + [git-rev-list-args...]:: A list of arguments, acceptable to 'git rev-parse' and 'git rev-list', that specifies the specific objects and references - to export. For example, `master\~10..master` causes the + to export. For example, `master{tilde}10..master` causes the current master reference to be exported along with all objects added since its 10th ancestor commit. diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 77a0a2481a..966ba4f213 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -482,9 +482,11 @@ External data format:: 'M' SP <mode> SP <dataref> SP <path> LF .... + -Here `<dataref>` can be either a mark reference (`:<idnum>`) +Here usually `<dataref>` must be either a mark reference (`:<idnum>`) set by a prior `blob` command, or a full 40-byte SHA-1 of an -existing Git blob object. +existing Git blob object. If `<mode>` is `040000`` then +`<dataref>` must be the full 40-byte SHA-1 of an existing +Git tree object or a mark reference set with `--import-marks`. Inline data format:: The data content for the file has not been supplied yet. @@ -509,6 +511,8 @@ in octal. Git only supports the following modes: * `160000`: A gitlink, SHA-1 of the object refers to a commit in another repository. Git links can only be specified by SHA or through a commit mark. They are used to implement submodules. +* `040000`: A subdirectory. Subdirectories can only be specified by + SHA or through a tree mark set with `--import-marks`. In both formats `<path>` is the complete path of the file to be added (if not already existing) or modified (if already existing). diff --git a/Documentation/git-fmt-merge-msg.txt b/Documentation/git-fmt-merge-msg.txt index a585dbe898..302f56b889 100644 --- a/Documentation/git-fmt-merge-msg.txt +++ b/Documentation/git-fmt-merge-msg.txt @@ -9,8 +9,8 @@ git-fmt-merge-msg - Produce a merge commit message SYNOPSIS -------- [verse] -'git fmt-merge-msg' [--log | --no-log] <$GIT_DIR/FETCH_HEAD -'git fmt-merge-msg' [--log | --no-log] -F <file> +'git fmt-merge-msg' [-m <message>] [--log | --no-log] <$GIT_DIR/FETCH_HEAD +'git fmt-merge-msg' [-m <message>] [--log | --no-log] -F <file> DESCRIPTION ----------- @@ -38,6 +38,11 @@ OPTIONS Synonyms to --log and --no-log; these are deprecated and will be removed in the future. +-m <message>:: +--message <message>:: + Use <message> instead of the branch names for the first line + of the log message. For use with `--log`. + -F <file>:: --file <file>:: Take the list of merged objects from <file> instead of diff --git a/Documentation/git-for-each-ref.txt b/Documentation/git-for-each-ref.txt index 390d85ccae..d66fd9d231 100644 --- a/Documentation/git-for-each-ref.txt +++ b/Documentation/git-for-each-ref.txt @@ -9,7 +9,7 @@ SYNOPSIS -------- [verse] 'git for-each-ref' [--count=<count>] [--shell|--perl|--python|--tcl] - [--sort=<key>]\* [--format=<format>] [<pattern>...] + [--sort=<key>]* [--format=<format>] [<pattern>...] DESCRIPTION ----------- diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 5474dd7f94..dab0a78fa8 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -191,11 +191,11 @@ OPTIONS Examples -------- -git grep 'time_t' \-- '*.[ch]':: +git grep {apostrophe}time_t{apostrophe} \-- {apostrophe}*.[ch]{apostrophe}:: Looks for `time_t` in all tracked .c and .h files in the working directory and its subdirectories. -git grep -e \'#define\' --and \( -e MAX_PATH -e PATH_MAX \):: +git grep -e {apostrophe}#define{apostrophe} --and \( -e MAX_PATH -e PATH_MAX \):: Looks for a line that has `#define` and either `MAX_PATH` or `PATH_MAX`. diff --git a/Documentation/git-help.txt b/Documentation/git-help.txt index f8df109d07..eccd0ffd38 100644 --- a/Documentation/git-help.txt +++ b/Documentation/git-help.txt @@ -55,9 +55,9 @@ other display programs (see below). + The web browser can be specified using the configuration variable 'help.browser', or 'web.browser' if the former is not set. If none of -these config variables is set, the 'git web--browse' helper script +these config variables is set, the 'git web{litdd}browse' helper script (called by 'git help') will pick a suitable default. See -linkgit:git-web--browse[1] for more information about this. +linkgit:git-web{litdd}browse[1] for more information about this. CONFIGURATION VARIABLES ----------------------- @@ -80,7 +80,7 @@ help.browser, web.browser and browser.<tool>.path The 'help.browser', 'web.browser' and 'browser.<tool>.path' will also be checked if the 'web' format is chosen (either by command line option or configuration variable). See '-w|--web' in the OPTIONS -section above and linkgit:git-web--browse[1]. +section above and linkgit:git-web{litdd}browse[1]. man.viewer ~~~~~~~~~~ diff --git a/Documentation/git-instaweb.txt b/Documentation/git-instaweb.txt index 2c3c4d2994..7477ce8fa8 100644 --- a/Documentation/git-instaweb.txt +++ b/Documentation/git-instaweb.txt @@ -44,20 +44,23 @@ OPTIONS -b:: --browser:: The web browser that should be used to view the gitweb - page. This will be passed to the 'git web--browse' helper + page. This will be passed to the 'git web{litdd}browse' helper script along with the URL of the gitweb instance. See - linkgit:git-web--browse[1] for more information about this. If + linkgit:git-web{litdd}browse[1] for more information about this. If the script fails, the URL will be printed to stdout. +start:: --start:: Start the httpd instance and exit. This does not generate any of the configuration files for spawning a new instance. +stop:: --stop:: Stop the httpd instance and exit. This does not generate any of the configuration files for spawning a new instance, nor does it close the browser. +restart:: --restart:: Restart the httpd instance and exit. This does not generate any of the configuration files for spawning a new instance. @@ -79,7 +82,7 @@ You may specify configuration in your .git/config If the configuration variable 'instaweb.browser' is not set, 'web.browser' will be used instead if it is defined. See -linkgit:git-web--browse[1] for more information about this. +linkgit:git-web{litdd}browse[1] for more information about this. Author ------ diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt index bd919f2dfd..15aee2f953 100644 --- a/Documentation/git-ls-files.txt +++ b/Documentation/git-ls-files.txt @@ -10,14 +10,14 @@ SYNOPSIS -------- [verse] 'git ls-files' [-z] [-t] [-v] - (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])\* - (-[c|d|o|i|s|u|k|m])\* + (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])* + (-[c|d|o|i|s|u|k|m])* [-x <pattern>|--exclude=<pattern>] [-X <file>|--exclude-from=<file>] [--exclude-per-directory=<file>] [--exclude-standard] [--error-unmatch] [--with-tree=<tree-ish>] - [--full-name] [--abbrev] [--] [<file>]\* + [--full-name] [--abbrev] [--] [<file>]* DESCRIPTION ----------- @@ -140,6 +140,12 @@ a space) at the start of each line: lines, show only a partial prefix. Non default number of digits can be specified with --abbrev=<n>. +--debug:: + After each line that describes a file, add more data about its + cache entry. This is intended to show as much information as + possible for manual inspection; the exact format may change at + any time. + \--:: Do not interpret any more arguments as options. diff --git a/Documentation/git-merge-base.txt b/Documentation/git-merge-base.txt index ce5b369985..eedef1bb1a 100644 --- a/Documentation/git-merge-base.txt +++ b/Documentation/git-merge-base.txt @@ -8,7 +8,9 @@ git-merge-base - Find as good common ancestors as possible for a merge SYNOPSIS -------- -'git merge-base' [-a|--all] <commit> <commit>... +[verse] +'git merge-base' [-a|--all] [--octopus] <commit> <commit>... +'git merge-base' --independent <commit>... DESCRIPTION ----------- @@ -20,12 +22,12 @@ that does not have any better common ancestor is a 'best common ancestor', i.e. a 'merge base'. Note that there can be more than one merge base for a pair of commits. -Among the two commits to compute the merge base from, one is specified by -the first commit argument on the command line; the other commit is a -(possibly hypothetical) commit that is a merge across all the remaining -commits on the command line. As the most common special case, specifying only -two commits on the command line means computing the merge base between -the given two commits. +Unless `--octopus` is given, among the two commits to compute the merge +base from, one is specified by the first commit argument on the command +line; the other commit is a (possibly hypothetical) commit that is a merge +across all the remaining commits on the command line. As the most common +special case, specifying only two commits on the command line means +computing the merge base between the given two commits. As a consequence, the 'merge base' is not necessarily contained in each of the commit arguments if more than two commits are specified. This is different @@ -37,6 +39,18 @@ OPTIONS --all:: Output all merge bases for the commits, instead of just one. +--octopus:: + Compute the best common ancestors of all supplied commits, + in preparation for an n-way merge. This mimics the behavior + of 'git show-branch --merge-base'. + +--independent:: + Instead of printing merge bases, print a minimal subset of + the supplied commits with the same ancestors. In other words, + among the commits given, list those which cannot be reached + from any other. This mimics the behavior of 'git show-branch + --independent'. + DISCUSSION ---------- @@ -96,6 +110,12 @@ Documentation -------------- Documentation by David Greaves, Junio C Hamano and the git-list <git@vger.kernel.org>. +See also +-------- +linkgit:git-rev-list[1], +linkgit:git-show-branch[1], +linkgit:git-merge[1] + GIT --- Part of the linkgit:git[1] suite diff --git a/Documentation/git-merge-index.txt b/Documentation/git-merge-index.txt index 4d266de9cc..921b38f183 100644 --- a/Documentation/git-merge-index.txt +++ b/Documentation/git-merge-index.txt @@ -8,7 +8,7 @@ git-merge-index - Run a merge for files needing merging SYNOPSIS -------- -'git merge-index' [-o] [-q] <merge-program> (-a | [--] <file>\*) +'git merge-index' [-o] [-q] <merge-program> (-a | [--] <file>*) DESCRIPTION ----------- diff --git a/Documentation/git-mergetool--lib.txt b/Documentation/git-mergetool--lib.txt index 78eb03f0ae..d8df55362c 100644 --- a/Documentation/git-mergetool--lib.txt +++ b/Documentation/git-mergetool--lib.txt @@ -1,5 +1,5 @@ -git-mergetool--lib(1) -===================== +git-mergetool{litdd}lib(1) +========================== NAME ---- @@ -16,11 +16,11 @@ This is not a command the end user would want to run. Ever. This documentation is meant for people who are studying the Porcelain-ish scripts and/or are writing new ones. -The 'git-mergetool--lib' scriptlet is designed to be sourced (using +The 'git-mergetool{litdd}lib' scriptlet is designed to be sourced (using `.`) by other shell scripts to set up functions for working with git merge tools. -Before sourcing 'git-mergetool--lib', your script must set `TOOL_MODE` +Before sourcing 'git-mergetool{litdd}lib', your script must set `TOOL_MODE` to define the operation mode for the functions listed below. 'diff' and 'merge' are valid values. diff --git a/Documentation/git-push.txt b/Documentation/git-push.txt index 658ff2ff67..020955ff5a 100644 --- a/Documentation/git-push.txt +++ b/Documentation/git-push.txt @@ -200,7 +200,7 @@ summary:: For a successfully pushed ref, the summary shows the old and new values of the ref in a form suitable for using as an argument to `git log` (this is `<old>..<new>` in most cases, and - `<old>...<new>` for forced non-fast-forward updates). + `<old>\...<new>` for forced non-fast-forward updates). + For a failed update, more details are given: + diff --git a/Documentation/git-rebase.txt b/Documentation/git-rebase.txt index be23ad2359..30e5c0eb14 100644 --- a/Documentation/git-rebase.txt +++ b/Documentation/git-rebase.txt @@ -199,6 +199,9 @@ rebase.stat:: Whether to show a diffstat of what changed upstream since the last rebase. False by default. +rebase.autosquash:: + If set to true enable '--autosquash' option by default. + OPTIONS ------- <newbase>:: @@ -207,7 +210,7 @@ OPTIONS <upstream>. May be any valid commit, and not just an existing branch name. + -As a special case, you may use "A...B" as a shortcut for the +As a special case, you may use "A\...B" as a shortcut for the merge base of A and B if there is exactly one merge base. You can leave out at most one of A and B, in which case it defaults to HEAD. @@ -250,6 +253,13 @@ on top of the <upstream> branch using the given strategy, using the 'ours' strategy simply discards all patches from the <branch>, which makes little sense. +-X <strategy-option>:: +--strategy-option=<strategy-option>:: + Pass the <strategy-option> through to the merge strategy. + This implies `\--merge` and, if no strategy has been + specified, `-s recursive`. Note the reversal of 'ours' and + 'theirs' as noted in above for the `-m` option. + -q:: --quiet:: Be quiet. Implies --no-stat. @@ -326,6 +336,7 @@ idea unless you know what you are doing (see BUGS below). instead. --autosquash:: +--no-autosquash:: When the commit log message begins with "squash! ..." (or "fixup! ..."), and there is a commit whose title begins with the same ..., automatically modify the todo list of rebase -i @@ -334,6 +345,10 @@ idea unless you know what you are doing (see BUGS below). commit from `pick` to `squash` (or `fixup`). + This option is only valid when the '--interactive' option is used. ++ +If the '--autosquash' option is enabled by default using the +configuration variable `rebase.autosquash`, this option can be +used to override and disable this setting. --no-ff:: With --interactive, cherry-pick all rebased commits instead of @@ -459,6 +474,30 @@ sure that the current HEAD is "B", and call $ git rebase -i -p --onto Q O ----------------------------- +Reordering and editing commits usually creates untested intermediate +steps. You may want to check that your history editing did not break +anything by running a test, or at least recompiling at intermediate +points in history by using the "exec" command (shortcut "x"). You may +do so by creating a todo list like this one: + +------------------------------------------- +pick deadbee Implement feature XXX +fixup f1a5c00 Fix to feature XXX +exec make +pick c0ffeee The oneline of the next commit +edit deadbab The oneline of the commit after +exec cd subdir; make test +... +------------------------------------------- + +The interactive rebase will stop when a command fails (i.e. exits with +non-0 status) to give you an opportunity to fix the problem. You can +continue with `git rebase --continue`. + +The "exec" command launches the command in a shell (the one specified +in `$SHELL`, or the default shell if `$SHELL` is not set), so you can +use shell features (like "cd", ">", ";" ...). The command is run from +the root of the working tree. SPLITTING COMMITS ----------------- diff --git a/Documentation/git-relink.txt b/Documentation/git-relink.txt index 25ff8f9dcb..8a5842bb93 100644 --- a/Documentation/git-relink.txt +++ b/Documentation/git-relink.txt @@ -7,7 +7,7 @@ git-relink - Hardlink common objects in local repositories SYNOPSIS -------- -'git relink' [--safe] <dir> [<dir>]\* <master_dir> +'git relink' [--safe] <dir> [<dir>]* <master_dir> DESCRIPTION ----------- diff --git a/Documentation/git-rev-parse.txt b/Documentation/git-rev-parse.txt index be4c053360..341ca90c6e 100644 --- a/Documentation/git-rev-parse.txt +++ b/Documentation/git-rev-parse.txt @@ -74,7 +74,7 @@ OPTIONS properly quoted for consumption by shell. Useful when you expect your parameter to contain whitespaces and newlines (e.g. when using pickaxe `-S` with - 'git diff-\*'). In contrast to the `--sq-quote` option, + 'git diff-{asterisk}'). In contrast to the `--sq-quote` option, the command input is still interpreted as usual. --not:: @@ -112,14 +112,15 @@ OPTIONS + If a `pattern` is given, only refs matching the given shell glob are shown. If the pattern does not contain a globbing character (`?`, -`\*`, or `[`), it is turned into a prefix match by appending `/\*`. +`{asterisk}`, or `[`), it is turned into a prefix match by +appending `/{asterisk}`. --glob=pattern:: Show all refs matching the shell glob pattern `pattern`. If the pattern does not start with `refs/`, this is automatically prepended. If the pattern does not contain a globbing - character (`?`, `\*`, or `[`), it is turned into a prefix - match by appending `/\*`. + character (`?`, `{asterisk}`, or `[`), it is turned into a prefix + match by appending `/{asterisk}`. --show-toplevel:: Show the absolute path of the top-level directory. diff --git a/Documentation/git-rm.txt b/Documentation/git-rm.txt index c21d19e573..71e3d9fc23 100644 --- a/Documentation/git-rm.txt +++ b/Documentation/git-rm.txt @@ -78,7 +78,8 @@ a file that you have not told git about does not remove that file. File globbing matches across directory boundaries. Thus, given two directories `d` and `d2`, there is a difference between -using `git rm \'d\*\'` and `git rm \'d/\*\'`, as the former will +using `git rm {apostrophe}d{asterisk}{apostrophe}` and +`git rm {apostrophe}d/{asterisk}{apostrophe}`, as the former will also remove all of directory `d2`. REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM @@ -135,11 +136,11 @@ git diff --name-only --diff-filter=D -z | xargs -0 git rm --cached EXAMPLES -------- -git rm Documentation/\\*.txt:: - Removes all `\*.txt` files from the index that are under the +git rm Documentation/\*.txt:: + Removes all `*.txt` files from the index that are under the `Documentation` directory and any of its subdirectories. + -Note that the asterisk `\*` is quoted from the shell in this +Note that the asterisk `*` is quoted from the shell in this example; this lets git, and not the shell, expand the pathnames of files and subdirectories under the `Documentation/` directory. diff --git a/Documentation/git-show-branch.txt b/Documentation/git-show-branch.txt index 81ba29669c..6453263340 100644 --- a/Documentation/git-show-branch.txt +++ b/Documentation/git-show-branch.txt @@ -168,10 +168,10 @@ $ git show-branch master fixes mhf ------------------------------------------------ These three branches all forked from a common commit, [master], -whose commit message is "Add \'git show-branch\'". The "fixes" -branch adds one commit "Introduce "reset type" flag to "git reset"". -The "mhf" branch adds many other commits. The current branch -is "master". +whose commit message is "Add {apostrophe}git show-branch{apostrophe}". +The "fixes" branch adds one commit "Introduce "reset type" flag to +"git reset"". The "mhf" branch adds many other commits. +The current branch is "master". EXAMPLE diff --git a/Documentation/git-show-ref.txt b/Documentation/git-show-ref.txt index 75780d7d63..4696af7433 100644 --- a/Documentation/git-show-ref.txt +++ b/Documentation/git-show-ref.txt @@ -73,8 +73,8 @@ OPTIONS --exclude-existing[=<pattern>]:: Make 'git show-ref' act as a filter that reads refs from stdin of the - form "^(?:<anything>\s)?<refname>(?:\^\{\})?$" and performs the - following actions on each: + form "^(?:<anything>\s)?<refname>(?:{backslash}{caret}\{\})?$" + and performs the following actions on each: (1) strip "^{}" at the end of line if any; (2) ignore if pattern is provided and does not head-match refname; (3) warn if refname is not a well-formed refname and skip; diff --git a/Documentation/git-stash.txt b/Documentation/git-stash.txt index 473889a660..8728f7a514 100644 --- a/Documentation/git-stash.txt +++ b/Documentation/git-stash.txt @@ -104,18 +104,22 @@ tree's changes, but also the index's ones. However, this can fail, when you have conflicts (which are stored in the index, where you therefore can no longer apply the changes as they were originally). + -When no `<stash>` is given, `stash@\{0}` is assumed. +When no `<stash>` is given, `stash@\{0}` is assumed, otherwise `<stash>` must +be a reference of the form `stash@\{<revision>}`. apply [--index] [-q|--quiet] [<stash>]:: - Like `pop`, but do not remove the state from the stash list. + Like `pop`, but do not remove the state from the stash list. Unlike `pop`, + `<stash>` may be any commit that looks like a commit created by + `stash save` or `stash create`. branch <branchname> [<stash>]:: Creates and checks out a new branch named `<branchname>` starting from the commit at which the `<stash>` was originally created, applies the - changes recorded in `<stash>` to the new working tree and index, then - drops the `<stash>` if that completes successfully. When no `<stash>` + changes recorded in `<stash>` to the new working tree and index. + If that succeeds, and `<stash>` is a reference of the form + `stash@{<revision>}`, it then drops the `<stash>`. When no `<stash>` is given, applies the latest one. + This is useful if the branch on which you ran `git stash save` has @@ -132,7 +136,9 @@ clear:: drop [-q|--quiet] [<stash>]:: Remove a single stashed state from the stash list. When no `<stash>` - is given, it removes the latest one. i.e. `stash@\{0}` + is given, it removes the latest one. i.e. `stash@\{0}`, otherwise + `<stash>` must a valid stash log reference of the form + `stash@\{<revision>}`. create:: diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt index 2fd054c104..dae190a5f2 100644 --- a/Documentation/git-status.txt +++ b/Documentation/git-status.txt @@ -55,7 +55,11 @@ specified. --ignore-submodules[=<when>]:: Ignore changes to submodules when looking for changes. <when> can be - either "untracked", "dirty" or "all", which is the default. When + either "none", "untracked", "dirty" or "all", which is the default. + Using "none" will consider the submodule modified when it either contains + untracked or modified files or its HEAD differs from the commit recorded + in the superproject and can be used to override any settings of the + 'ignore' option in linkgit:git-config[1] or linkgit:gitmodules[5]. When "untracked" is used submodules are not considered dirty when they only contain untracked content (but they are still scanned for modified content). Using "dirty" ignores all changes to the work tree of submodules, diff --git a/Documentation/git-svn.txt b/Documentation/git-svn.txt index b09bd9761f..4b84d08fc8 100644 --- a/Documentation/git-svn.txt +++ b/Documentation/git-svn.txt @@ -646,6 +646,12 @@ svn.brokenSymlinkWorkaround:: revision fetched. If unset, 'git svn' assumes this option to be "true". +svn.pathnameencoding:: + This instructs git svn to recode pathnames to a given encoding. + It can be used by windows users and by those who work in non-utf8 + locales to avoid corrupted file names with non-ASCII characters. + Valid encodings are the ones supported by Perl's Encode module. + Since the noMetadata, rewriteRoot, rewriteUUID, useSvnsyncProps and useSvmProps options all affect the metadata generated and used by 'git svn'; they *must* be set in the configuration file before any history is imported diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt index 765d4b312e..74d1d49dbf 100644 --- a/Documentation/git-update-index.txt +++ b/Documentation/git-update-index.txt @@ -12,7 +12,7 @@ SYNOPSIS 'git update-index' [--add] [--remove | --force-remove] [--replace] [--refresh] [-q] [--unmerged] [--ignore-missing] - [--cacheinfo <mode> <object> <file>]\* + [--cacheinfo <mode> <object> <file>]* [--chmod=(+|-)x] [--assume-unchanged | --no-assume-unchanged] [--skip-worktree | --no-skip-worktree] @@ -21,7 +21,7 @@ SYNOPSIS [--info-only] [--index-info] [-z] [--stdin] [--verbose] - [--] [<file>]\* + [--] [<file>]* DESCRIPTION ----------- diff --git a/Documentation/git-web--browse.txt b/Documentation/git-web--browse.txt index 75720491b2..e1586c78c3 100644 --- a/Documentation/git-web--browse.txt +++ b/Documentation/git-web--browse.txt @@ -1,5 +1,5 @@ -git-web--browse(1) -================== +git-web{litdd}browse(1) +======================= NAME ---- @@ -7,7 +7,7 @@ git-web--browse - git helper script to launch a web browser SYNOPSIS -------- -'git web--browse' [OPTIONS] URL/FILE ... +'git web{litdd}browse' [OPTIONS] URL/FILE ... DESCRIPTION ----------- @@ -71,7 +71,7 @@ browser.<tool>.cmd When the browser, specified by options or configuration variables, is not among the supported ones, then the corresponding 'browser.<tool>.cmd' configuration variable will be looked up. If this -variable exists then 'git web--browse' will treat the specified tool +variable exists then 'git web{litdd}browse' will treat the specified tool as a custom command and will use a shell eval to run the command with the URLs passed as arguments. diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 564586b943..e5a27d875e 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -317,6 +317,17 @@ command is "cat"). smudge = cat ------------------------ +For best results, `clean` should not alter its output further if it is +run twice ("clean->clean" should be equivalent to "clean"), and +multiple `smudge` commands should not alter `clean`'s output +("smudge->smudge->clean" should be equivalent to "clean"). See the +section on merging below. + +The "indent" filter is well-behaved in this regard: it will not modify +input that is already correctly indented. In this case, the lack of a +smudge filter means that the clean filter _must_ accept its own output +without modifying it. + Interaction between checkin/checkout attributes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -331,6 +342,29 @@ In the check-out codepath, the blob content is first converted with `text`, and then `ident` and fed to `filter`. +Merging branches with differing checkin/checkout attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you have added attributes to a file that cause the canonical +repository format for that file to change, such as adding a +clean/smudge filter or text/eol/ident attributes, merging anything +where the attribute is not in place would normally cause merge +conflicts. + +To prevent these unnecessary merge conflicts, git can be told to run a +virtual check-out and check-in of all three stages of a file when +resolving a three-way merge by setting the `merge.renormalize` +configuration variable. This prevents changes caused by check-in +conversion from causing spurious merge conflicts when a converted file +is merged with an unconverted file. + +As long as a "smudge->clean" results in the same output as a "clean" +even on files that are already smudged, this strategy will +automatically resolve all filter-related conflicts. Filters that do +not act in this way may cause additional merge conflicts that must be +resolved manually. + + Generating diff text ~~~~~~~~~~~~~~~~~~~~ @@ -441,6 +475,8 @@ patterns are available: - `cpp` suitable for source code in the C and C++ languages. +- `csharp` suitable for source code in the C# language. + - `html` suitable for HTML/XHTML documents. - `java` suitable for source code in the Java language. diff --git a/Documentation/gitcore-tutorial.txt b/Documentation/gitcore-tutorial.txt index ed3ddc92cb..5e9c5ebba3 100644 --- a/Documentation/gitcore-tutorial.txt +++ b/Documentation/gitcore-tutorial.txt @@ -110,7 +110,7 @@ An 'object' is identified by its 160-bit SHA1 hash, aka 'object name', and a reference to an object is always the 40-byte hex representation of that SHA1 name. The files in the `refs` subdirectory are expected to contain these hex references -(usually with a final `\'\n\'` at the end), and you should thus +(usually with a final `\n` at the end), and you should thus expect to see a number of 41-byte files containing these references in these `refs` subdirectories when you actually start populating your tree. @@ -310,7 +310,7 @@ and this will just output the name of the resulting tree, in this case ---------------- which is another incomprehensible object name. Again, if you want to, -you can use `git cat-file -t 8988d\...` to see that this time the object +you can use `git cat-file -t 8988d...` to see that this time the object is not a "blob" object, but a "tree" object (you can also use `git cat-file` to actually output the raw object contents, but you'll see mainly a binary mess, so that's less interesting). @@ -436,8 +436,8 @@ $ git update-index hello (note how we didn't need the `\--add` flag this time, since git knew about the file already). -Note what happens to the different 'git diff-\*' versions here. After -we've updated `hello` in the index, `git diff-files -p` now shows no +Note what happens to the different 'git diff-{asterisk}' versions here. +After we've updated `hello` in the index, `git diff-files -p` now shows no differences, but `git diff-index -p HEAD` still *does* show that the current state is different from the state we committed. In fact, now 'git diff-index' shows the same difference whether we use the `--cached` @@ -494,7 +494,7 @@ and it will show what the last commit (in `HEAD`) actually changed. [NOTE] ============ Here is an ASCII art by Jon Loeliger that illustrates how -various diff-\* commands compare things. +various 'diff-{asterisk}' commands compare things. diff-tree +----+ @@ -958,11 +958,11 @@ $ git show-branch --topo-order --more=1 master mybranch The first two lines indicate that it is showing the two branches and the first line of the commit log message from their top-of-the-tree commits, you are currently on `master` branch -(notice the asterisk `\*` character), and the first column for +(notice the asterisk `{asterisk}` character), and the first column for the later output lines is used to show commits contained in the `master` branch, and the second column for the `mybranch` branch. Three commits are shown along with their log messages. -All of them have non blank characters in the first column (`*` +All of them have non blank characters in the first column (`{asterisk}` shows an ordinary commit on the current branch, `-` is a merge commit), which means they are now part of the `master` branch. Only the "Some work" commit has the plus `+` character in the second column, @@ -1092,7 +1092,7 @@ Downloader from http and https URL first obtains the topmost commit object name from the remote site by looking at the specified refname under `repo.git/refs/` directory, and then tries to obtain the -commit object by downloading from `repo.git/objects/xx/xxx\...` +commit object by downloading from `repo.git/objects/xx/xxx...` using the object name of that commit object. Then it reads the commit object to find out its parent commits and the associate tree object; it repeats this process until it gets all the @@ -1420,7 +1420,7 @@ packed, and stores the packed file in `.git/objects/pack` directory. [NOTE] -You will see two files, `pack-\*.pack` and `pack-\*.idx`, +You will see two files, `pack-{asterisk}.pack` and `pack-{asterisk}.idx`, in `.git/objects/pack` directory. They are closely related to each other, and if you ever copy them by hand to a different repository for whatever reason, you should make sure you copy diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index e10fa88b8c..7dc2e8b0bc 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -90,12 +90,12 @@ Patterns have the following format: - Otherwise, git treats the pattern as a shell glob suitable for consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will not match a / in the pathname. - For example, "Documentation/\*.html" matches + For example, "Documentation/{asterisk}.html" matches "Documentation/git.html" but not "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html". - A leading slash matches the beginning of the pathname. - For example, "/*.c" matches "cat-file.c" but not + For example, "/{asterisk}.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c". An example: diff --git a/Documentation/gitmodules.txt b/Documentation/gitmodules.txt index 72a13d18e0..bcffd95ada 100644 --- a/Documentation/gitmodules.txt +++ b/Documentation/gitmodules.txt @@ -44,6 +44,21 @@ submodule.<name>.update:: This config option is overridden if 'git submodule update' is given the '--merge' or '--rebase' options. +submodule.<name>.ignore:: + Defines under what circumstances "git status" and the diff family show + a submodule as modified. When set to "all", it will never be considered + modified, "dirty" will ignore all changes to the submodules work tree and + takes only differences between the HEAD of the submodule and the commit + recorded in the superproject into account. "untracked" will additionally + let submodules with modified tracked files in their work tree show up. + Using "none" (the default when this option is not set) also shows + submodules that have untracked files in their work tree as changed. + If this option is also present in the submodules entry in .git/config of + the superproject, the setting there will override the one found in + .gitmodules. + Both settings can be overridden on the command line by using the + "--ignore-submodule" option. + EXAMPLES -------- diff --git a/Documentation/howto/revert-branch-rebase.txt b/Documentation/howto/revert-branch-rebase.txt index 8c32da6deb..093c656048 100644 --- a/Documentation/howto/revert-branch-rebase.txt +++ b/Documentation/howto/revert-branch-rebase.txt @@ -112,25 +112,19 @@ $ git tag pu-anchor pu $ git rebase master * Applying: Redo "revert" using three-way merge machinery. First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: Remove git-apply-patch-script. First trying simple merge strategy to cherry-pick. Simple cherry-pick fails; trying Automatic cherry-pick. Removing Documentation/git-apply-patch-script.txt Removing git-apply-patch-script -Finished one cherry-pick. * Applying: Document "git cherry-pick" and "git revert" First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: mailinfo and applymbox updates First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: Show commits in topo order and name all commits. First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. * Applying: More documentation updates. First trying simple merge strategy to cherry-pick. -Finished one cherry-pick. ------------------------------------------------ The temporary tag 'pu-anchor' is me just being careful, in case 'git diff --git a/Documentation/install-webdoc.sh b/Documentation/install-webdoc.sh index 34d02a2418..37e67d1a14 100755 --- a/Documentation/install-webdoc.sh +++ b/Documentation/install-webdoc.sh @@ -12,7 +12,7 @@ do then : did not match elif test -f "$T/$h" && - $DIFF -u -I'Last updated [0-9][0-9]-[A-Z][a-z][a-z]-' "$T/$h" "$h" + $DIFF -u -I'^Last updated ' "$T/$h" "$h" then :; # up to date else diff --git a/Documentation/merge-config.txt b/Documentation/merge-config.txt index a403155052..b72f533970 100644 --- a/Documentation/merge-config.txt +++ b/Documentation/merge-config.txt @@ -15,6 +15,16 @@ merge.renameLimit:: during a merge; if not specified, defaults to the value of diff.renameLimit. +merge.renormalize:: + Tell git that canonical representation of files in the + repository has changed over time (e.g. earlier commits record + text files with CRLF line endings, but recent ones use LF line + endings). In such a repository, git can convert the data + recorded in commits to a canonical form before performing a + merge to reduce unnecessary conflicts. For more information, + see section "Merging branches with differing checkin/checkout + attributes" in linkgit:gitattributes[5]. + merge.stat:: Whether to print the diffstat between ORIG_HEAD and the merge result at the end of the merge. True by default. diff --git a/Documentation/merge-strategies.txt b/Documentation/merge-strategies.txt index a5bc1dbb95..049313d601 100644 --- a/Documentation/merge-strategies.txt +++ b/Documentation/merge-strategies.txt @@ -40,6 +40,18 @@ the other tree did, declaring 'our' history contains all that happened in it. theirs;; This is opposite of 'ours'. +renormalize;; + This runs a virtual check-out and check-in of all three stages + of a file when resolving a three-way merge. This option is + meant to be used when merging branches with different clean + filters or end-of-line normalization rules. See "Merging + branches with differing checkin/checkout attributes" in + linkgit:gitattributes[5] for details. + +no-renormalize;; + Disables the `renormalize` option. This overrides the + `merge.renormalize` configuration variable. + subtree[=path];; This option is a more advanced form of 'subtree' strategy, where the strategy makes a guess on how two trees must be shifted to diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index cc562a057a..e2237ae4a0 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -321,7 +321,7 @@ excluded from the output. reflog entries from the most recent one to older ones. When this option is used you cannot specify commits to exclude (that is, '{caret}commit', 'commit1..commit2', - nor 'commit1...commit2' notations cannot be used). + nor 'commit1\...commit2' notations cannot be used). + With '\--pretty' format other than oneline (for obvious reasons), this causes the output to have two extra lines of information diff --git a/Documentation/technical/api-merge.txt b/Documentation/technical/api-merge.txt new file mode 100644 index 0000000000..a7e050bb7a --- /dev/null +++ b/Documentation/technical/api-merge.txt @@ -0,0 +1,73 @@ +merge API +========= + +The merge API helps a program to reconcile two competing sets of +improvements to some files (e.g., unregistered changes from the work +tree versus changes involved in switching to a new branch), reporting +conflicts if found. The library called through this API is +responsible for a few things. + + * determining which trees to merge (recursive ancestor consolidation); + + * lining up corresponding files in the trees to be merged (rename + detection, subtree shifting), reporting edge cases like add/add + and rename/rename conflicts to the user; + + * performing a three-way merge of corresponding files, taking + path-specific merge drivers (specified in `.gitattributes`) + into account. + +Low-level (single file) merge +----------------------------- + +`ll_merge`:: + + Perform a three-way single-file merge in core. This is + a thin wrapper around `xdl_merge` that takes the path and + any merge backend specified in `.gitattributes` or + `.git/info/attributes` into account. Returns 0 for a + clean merge. + +The caller: + +1. allocates an mmbuffer_t variable for the result; +2. allocates and fills variables with the file's original content + and two modified versions (using `read_mmfile`, for example); +3. calls ll_merge(); +4. reads the output from result_buf.ptr and result_buf.size; +5. releases buffers when finished (free(ancestor.ptr); free(ours.ptr); + free(theirs.ptr); free(result_buf.ptr);). + +If the modifications do not merge cleanly, `ll_merge` will return a +nonzero value and `result_buf` will generally include a description of +the conflict bracketed by markers such as the traditional `<<<<<<<` +and `>>>>>>>`. + +The `ancestor_label`, `our_label`, and `their_label` parameters are +used to label the different sides of a conflict if the merge driver +supports this. + +The `flag` parameter is a bitfield: + + - The `LL_OPT_VIRTUAL_ANCESTOR` bit indicates whether this is an + internal merge to consolidate ancestors for a recursive merge. + + - The `LL_OPT_FAVOR_MASK` bits allow local conflicts to be automatically + resolved in favor of one side or the other (as in 'git merge-file' + `--ours`/`--theirs`/`--union`). + They can be populated by `create_ll_flag`, whose argument can be + `XDL_MERGE_FAVOR_OURS`, `XDL_MERGE_FAVOR_THEIRS`, or + `XDL_MERGE_FAVOR_UNION`. + +Everything else +--------------- + +Talk about <merge-recursive.h> and merge_file(): + + - merge_trees() to merge with rename detection + - merge_recursive() for ancestor consolidation + - try_merge_command() for other strategies + - conflict format + - merge options + +(Daniel, Miklos, Stephan, JC) diff --git a/Documentation/technical/api-parse-options.txt b/Documentation/technical/api-parse-options.txt index 312e3b2e2b..c5d141cd63 100644 --- a/Documentation/technical/api-parse-options.txt +++ b/Documentation/technical/api-parse-options.txt @@ -201,7 +201,7 @@ The last element of the array must be `OPT_END()`. If not stated otherwise, interpret the arguments as follows: * `short` is a character for the short option - (e.g. `\'e\'` for `-e`, use `0` to omit), + (e.g. `{apostrophe}e{apostrophe}` for `-e`, use `0` to omit), * `long` is a string for the long option (e.g. `"example"` for `\--example`, use `NULL` to omit), @@ -228,10 +228,10 @@ The function must be defined in this form: The callback mechanism is as follows: * Inside `func`, the only interesting member of the structure - given by `opt` is the void pointer `opt->value`. - `\*opt->value` will be the value that is saved into `var`, if you + given by `opt` is the void pointer `opt\->value`. + `\*opt\->value` will be the value that is saved into `var`, if you use `OPT_CALLBACK()`. - For example, do `*(unsigned long *)opt->value = 42;` to get 42 + For example, do `*(unsigned long *)opt\->value = 42;` to get 42 into an `unsigned long` variable. * Return value `0` indicates success and non-zero return diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt index 55b728632c..14af37c3f1 100644 --- a/Documentation/technical/api-tree-walking.txt +++ b/Documentation/technical/api-tree-walking.txt @@ -42,6 +42,8 @@ information. * `data` can be anything the `fn` callback would want to use. +* `show_all_errors` tells whether to stop at the first error or not. + Initializing ------------ diff --git a/Documentation/user-manual.txt b/Documentation/user-manual.txt index 22aee34d4a..fecc4eb5b3 100644 --- a/Documentation/user-manual.txt +++ b/Documentation/user-manual.txt @@ -4251,9 +4251,9 @@ Two things are interesting here: negative numbers in case of different errors--and 0 on success. - the variable `sha1` in the function signature of `get_sha1()` is `unsigned - char \*`, but is actually expected to be a pointer to `unsigned + char {asterisk}`, but is actually expected to be a pointer to `unsigned char[20]`. This variable will contain the 160-bit SHA-1 of the given - commit. Note that whenever a SHA-1 is passed as `unsigned char \*`, it + commit. Note that whenever a SHA-1 is passed as `unsigned char {asterisk}`, it is the binary representation, as opposed to the ASCII representation in hex characters, which is passed as `char *`. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 8efc557847..f6d301a10f 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v1.7.2.3 +DEF_VER=v1.7.2.GIT LF=' ' @@ -68,6 +68,8 @@ all:: # # Define NO_MKSTEMPS if you don't have mkstemps in the C library. # +# Define NO_STRTOK_R if you don't have strtok_r in the C library. +# # Define NO_LIBGEN_H if you don't have libgen.h. # # Define NEEDS_LIBGEN if your libgen needs -lgen when linking @@ -308,6 +310,7 @@ TCL_PATH = tclsh TCLTK_PATH = wish PTHREAD_LIBS = -lpthread PTHREAD_CFLAGS = +GCOV = gcov export TCL_PATH TCLTK_PATH @@ -408,12 +411,17 @@ TEST_PROGRAMS_NEED_X += test-date TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree TEST_PROGRAMS_NEED_X += test-genrandom +TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-match-trees +TEST_PROGRAMS_NEED_X += test-obj-pool TEST_PROGRAMS_NEED_X += test-parse-options TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-string-pool +TEST_PROGRAMS_NEED_X += test-svn-fe +TEST_PROGRAMS_NEED_X += test-treap TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) @@ -468,6 +476,7 @@ export PYTHON_PATH LIB_FILE=libgit.a XDIFF_LIB=xdiff/lib.a +VCSSVN_LIB=vcs-svn/lib.a LIB_H += advice.h LIB_H += archive.h @@ -1035,6 +1044,7 @@ ifeq ($(uname_S),Windows) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease # NEEDS_LIBICONV = YesPlease NO_ICONV = YesPlease @@ -1089,6 +1099,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease NEEDS_LIBICONV = YesPlease OLD_ICONV = YesPlease @@ -1319,6 +1330,10 @@ endif ifdef NO_STRTOULL COMPAT_CFLAGS += -DNO_STRTOULL endif +ifdef NO_STRTOK_R + COMPAT_CFLAGS += -DNO_STRTOK_R + COMPAT_OBJS += compat/strtok_r.o +endif ifdef NO_SETENV COMPAT_CFLAGS += -DNO_SETENV COMPAT_OBJS += compat/setenv.o @@ -1485,6 +1500,7 @@ ifndef V QUIET_BUILT_IN = @echo ' ' BUILTIN $@; QUIET_GEN = @echo ' ' GEN $@; QUIET_LNCP = @echo ' ' LN/CP $@; + QUIET_GCOV = @echo ' ' GCOV $@; QUIET_SUBDIR0 = +@subdir= QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir @@ -1739,7 +1755,9 @@ ifndef NO_CURL endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o -OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) +VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ + vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o +OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) dep_dirs := $(addsuffix .depend,$(sort $(dir $(OBJECTS)))) @@ -1861,6 +1879,11 @@ http.o http-walker.o http-push.o http-fetch.o remote-curl.o: http.h xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h + +$(VCSSVN_OBJS): \ + vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \ + vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \ + vcs-svn/svndump.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ @@ -1873,12 +1896,16 @@ builtin/init-db.s builtin/init-db.o: EXTRA_CPPFLAGS = \ config.s config.o: EXTRA_CPPFLAGS = -DETC_GITCONFIG='"$(ETC_GITCONFIG_SQ)"' -http.s http.o: EXTRA_CPPFLAGS = -DGIT_USER_AGENT='"git/$(GIT_VERSION)"' +http.s http.o: EXTRA_CPPFLAGS = -DGIT_HTTP_USER_AGENT='"git/$(GIT_VERSION)"' ifdef NO_EXPAT http-walker.s http-walker.o: EXTRA_CPPFLAGS = -DNO_EXPAT endif +ifdef NO_REGEX +compat/regex/regex.o: EXTRA_CPPFLAGS = -DGAWK -DNO_MBSUPPORT +endif + git-%$X: %.o $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -1909,6 +1936,8 @@ $(LIB_FILE): $(LIB_OBJS) $(XDIFF_LIB): $(XDIFF_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS) +$(VCSSVN_LIB): $(VCSSVN_OBJS) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(VCSSVN_OBJS) doc: $(MAKE) -C Documentation all @@ -2007,12 +2036,18 @@ test-date$X: date.o ctype.o test-delta$X: diff-delta.o patch-delta.o +test-line-buffer$X: vcs-svn/lib.a + test-parse-options$X: parse-options.o +test-string-pool$X: vcs-svn/lib.a + +test-svn-fe$X: vcs-svn/lib.a + .PRECIOUS: $(TEST_OBJS) test-%$X: test-%.o $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS) check-sha1:: test-sha1$X ./test-sha1.sh @@ -2187,8 +2222,8 @@ distclean: clean $(RM) configure clean: - $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o \ - builtin/*.o $(LIB_FILE) $(XDIFF_LIB) + $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \ + builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X $(RM) $(TEST_PROGRAMS) $(RM) -r bin-wrappers @@ -2291,11 +2326,18 @@ coverage: $(MAKE) coverage-build $(MAKE) coverage-report +object_dirs := $(sort $(dir $(OBJECTS))) coverage-clean: - rm -f *.gcda *.gcno + $(RM) $(addsuffix *.gcov,$(object_dirs)) + $(RM) $(addsuffix *.gcda,$(object_dirs)) + $(RM) $(addsuffix *.gcno,$(object_dirs)) + $(RM) coverage-untested-functions + $(RM) -r cover_db/ + $(RM) -r cover_db_html/ COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov +GCOVFLAGS = --preserve-paths --branch-probabilities --all-blocks coverage-build: coverage-clean $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all @@ -2303,7 +2345,17 @@ coverage-build: coverage-clean -j1 test coverage-report: - gcov -b *.c + $(QUIET_GCOV)for dir in $(object_dirs); do \ + $(GCOV) $(GCOVFLAGS) --object-directory=$$dir $$dir*.c || exit; \ + done + +coverage-untested-functions: coverage-report grep '^function.*called 0 ' *.c.gcov \ | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ - | tee coverage-untested-functions + > coverage-untested-functions + +cover_db: coverage-report + gcov2perl -db cover_db *.gcov + +cover_db_html: cover_db + cover -report html -outputdir cover_db_html cover_db @@ -1 +1 @@ -Documentation/RelNotes-1.7.2.3.txt
\ No newline at end of file +Documentation/RelNotes-1.7.3.txt
\ No newline at end of file @@ -22,6 +22,13 @@ char *alias_lookup(const char *alias) return alias_val; } +#define SPLIT_CMDLINE_BAD_ENDING 1 +#define SPLIT_CMDLINE_UNCLOSED_QUOTE 2 +static const char *split_cmdline_errors[] = { + "cmdline ends with \\", + "unclosed quote" +}; + int split_cmdline(char *cmdline, const char ***argv) { int src, dst, count = 0, size = 16; @@ -53,7 +60,7 @@ int split_cmdline(char *cmdline, const char ***argv) if (!c) { free(*argv); *argv = NULL; - return error("cmdline ends with \\"); + return -SPLIT_CMDLINE_BAD_ENDING; } } cmdline[dst++] = c; @@ -66,7 +73,7 @@ int split_cmdline(char *cmdline, const char ***argv) if (quoted) { free(*argv); *argv = NULL; - return error("unclosed quote"); + return -SPLIT_CMDLINE_UNCLOSED_QUOTE; } ALLOC_GROW(*argv, count+1, size); @@ -75,3 +82,6 @@ int split_cmdline(char *cmdline, const char ***argv) return count; } +const char *split_cmdline_strerror(int split_cmdline_errno) { + return split_cmdline_errors[-split_cmdline_errno-1]; +} @@ -141,7 +141,8 @@ static void show_list(const char *debug, int counted, int nr, enum object_type type; unsigned long size; char *buf = read_sha1_file(commit->object.sha1, &type, &size); - char *ep, *sp; + const char *subject_start; + int subject_len; fprintf(stderr, "%c%c%c ", (flags & TREESAME) ? ' ' : 'T', @@ -156,13 +157,9 @@ static void show_list(const char *debug, int counted, int nr, fprintf(stderr, " %.*s", 8, sha1_to_hex(pp->item->object.sha1)); - sp = strstr(buf, "\n\n"); - if (sp) { - sp += 2; - for (ep = sp; *ep && *ep != '\n'; ep++) - ; - fprintf(stderr, " %.*s", (int)(ep - sp), sp); - } + subject_len = find_commit_subject(buf, &subject_start); + if (subject_len) + fprintf(stderr, " %.*s", subject_len, subject_start); fprintf(stderr, "\n"); } } @@ -159,7 +159,7 @@ void create_branch(const char *head, dont_change_ref = 1; else if (!force) die("A branch named '%s' already exists.", name); - else if (!is_bare_repository() && !strcmp(head, name)) + else if (!is_bare_repository() && head && !strcmp(head, name)) die("Cannot force update the current branch."); forcing = 1; } diff --git a/builtin/apply.c b/builtin/apply.c index f38c1f7b88..23c18c573b 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -416,48 +416,190 @@ static char *squash_slash(char *name) return name; } -static char *find_name(const char *line, char *def, int p_value, int terminate) +static char *find_name_gnu(const char *line, char *def, int p_value) { - int len; - const char *start = NULL; + struct strbuf name = STRBUF_INIT; + char *cp; - if (p_value == 0) - start = line; + /* + * Proposed "new-style" GNU patch/diff format; see + * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2 + */ + if (unquote_c_style(&name, line, NULL)) { + strbuf_release(&name); + return NULL; + } - if (*line == '"') { - struct strbuf name = STRBUF_INIT; + for (cp = name.buf; p_value; p_value--) { + cp = strchr(cp, '/'); + if (!cp) { + strbuf_release(&name); + return NULL; + } + cp++; + } - /* - * Proposed "new-style" GNU patch/diff format; see - * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2 - */ - if (!unquote_c_style(&name, line, NULL)) { - char *cp; + /* name can later be freed, so we need + * to memmove, not just return cp + */ + strbuf_remove(&name, 0, cp - name.buf); + free(def); + if (root) + strbuf_insert(&name, 0, root, root_len); + return squash_slash(strbuf_detach(&name, NULL)); +} - for (cp = name.buf; p_value; p_value--) { - cp = strchr(cp, '/'); - if (!cp) - break; - cp++; - } - if (cp) { - /* name can later be freed, so we need - * to memmove, not just return cp - */ - strbuf_remove(&name, 0, cp - name.buf); - free(def); - if (root) - strbuf_insert(&name, 0, root, root_len); - return squash_slash(strbuf_detach(&name, NULL)); - } - } - strbuf_release(&name); +static size_t tz_len(const char *line, size_t len) +{ + const char *tz, *p; + + if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ') + return 0; + tz = line + len - strlen(" +0500"); + + if (tz[1] != '+' && tz[1] != '-') + return 0; + + for (p = tz + 2; p != line + len; p++) + if (!isdigit(*p)) + return 0; + + return line + len - tz; +} + +static size_t date_len(const char *line, size_t len) +{ + const char *date, *p; + + if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-') + return 0; + p = date = line + len - strlen("72-02-05"); + + if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a date. */ + return 0; + + if (date - line >= strlen("19") && + isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */ + date -= strlen("19"); + + return line + len - date; +} + +static size_t short_time_len(const char *line, size_t len) +{ + const char *time, *p; + + if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':') + return 0; + p = time = line + len - strlen(" 07:01:32"); + + /* Permit 1-digit hours? */ + if (*p++ != ' ' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a time. */ + return 0; + + return line + len - time; +} + +static size_t fractional_time_len(const char *line, size_t len) +{ + const char *p; + size_t n; + + /* Expected format: 19:41:17.620000023 */ + if (!len || !isdigit(line[len - 1])) + return 0; + p = line + len - 1; + + /* Fractional seconds. */ + while (p > line && isdigit(*p)) + p--; + if (*p != '.') + return 0; + + /* Hours, minutes, and whole seconds. */ + n = short_time_len(line, p - line); + if (!n) + return 0; + + return line + len - p + n; +} + +static size_t trailing_spaces_len(const char *line, size_t len) +{ + const char *p; + + /* Expected format: ' ' x (1 or more) */ + if (!len || line[len - 1] != ' ') + return 0; + + p = line + len; + while (p != line) { + p--; + if (*p != ' ') + return line + len - (p + 1); } - for (;;) { + /* All spaces! */ + return len; +} + +static size_t diff_timestamp_len(const char *line, size_t len) +{ + const char *end = line + len; + size_t n; + + /* + * Posix: 2010-07-05 19:41:17 + * GNU: 2010-07-05 19:41:17.620000023 -0500 + */ + + if (!isdigit(end[-1])) + return 0; + + n = tz_len(line, end - line); + end -= n; + + n = short_time_len(line, end - line); + if (!n) + n = fractional_time_len(line, end - line); + end -= n; + + n = date_len(line, end - line); + if (!n) /* No date. Too bad. */ + return 0; + end -= n; + + if (end == line) /* No space before date. */ + return 0; + if (end[-1] == '\t') { /* Success! */ + end--; + return line + len - end; + } + if (end[-1] != ' ') /* No space before date. */ + return 0; + + /* Whitespace damage. */ + end -= trailing_spaces_len(line, end - line); + return line + len - end; +} + +static char *find_name_common(const char *line, char *def, int p_value, + const char *end, int terminate) +{ + int len; + const char *start = NULL; + + if (p_value == 0) + start = line; + while (line != end) { char c = *line; - if (isspace(c)) { + if (!end && isspace(c)) { if (c == '\n') break; if (name_terminate(start, line-start, c, terminate)) @@ -497,6 +639,37 @@ static char *find_name(const char *line, char *def, int p_value, int terminate) return squash_slash(xmemdupz(start, len)); } +static char *find_name(const char *line, char *def, int p_value, int terminate) +{ + if (*line == '"') { + char *name = find_name_gnu(line, def, p_value); + if (name) + return name; + } + + return find_name_common(line, def, p_value, NULL, terminate); +} + +static char *find_name_traditional(const char *line, char *def, int p_value) +{ + size_t len = strlen(line); + size_t date_len; + + if (*line == '"') { + char *name = find_name_gnu(line, def, p_value); + if (name) + return name; + } + + len = strchrnul(line, '\n') - line; + date_len = diff_timestamp_len(line, len); + if (!date_len) + return find_name_common(line, def, p_value, NULL, TERM_TAB); + len -= date_len; + + return find_name_common(line, def, p_value, line + len, 0); +} + static int count_slashes(const char *cp) { int cnt = 0; @@ -519,7 +692,7 @@ static int guess_p_value(const char *nameline) if (is_dev_null(nameline)) return -1; - name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB); + name = find_name_traditional(nameline, NULL, 0); if (!name) return -1; cp = strchr(name, '/'); @@ -638,16 +811,16 @@ static void parse_traditional_patch(const char *first, const char *second, struc if (is_dev_null(first)) { patch->is_new = 1; patch->is_delete = 0; - name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(second, NULL, p_value); patch->new_name = name; } else if (is_dev_null(second)) { patch->is_new = 0; patch->is_delete = 1; - name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(first, NULL, p_value); patch->old_name = name; } else { - name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); - name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB); + name = find_name_traditional(first, NULL, p_value); + name = find_name_traditional(second, name, p_value); if (has_epoch_timestamp(first)) { patch->is_new = 1; patch->is_delete = 0; @@ -3606,11 +3779,11 @@ static int option_parse_directory(const struct option *opt, return 0; } -int cmd_apply(int argc, const char **argv, const char *unused_prefix) +int cmd_apply(int argc, const char **argv, const char *prefix_) { int i; int errs = 0; - int is_not_gitdir; + int is_not_gitdir = !startup_info->have_repository; int binary; int force_apply = 0; @@ -3683,7 +3856,7 @@ int cmd_apply(int argc, const char **argv, const char *unused_prefix) OPT_END() }; - prefix = setup_git_directory_gently(&is_not_gitdir); + prefix = prefix_; prefix_length = prefix ? strlen(prefix) : 0; git_config(git_apply_config, NULL); if (apply_default_whitespace) diff --git a/builtin/blame.c b/builtin/blame.c index 28e3be2ead..101535448f 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1407,7 +1407,8 @@ static void get_commit_info(struct commit *commit, int detailed) { int len; - char *tmp, *endp, *reencoded, *message; + const char *subject; + char *reencoded, *message; static char author_name[1024]; static char author_mail[1024]; static char committer_name[1024]; @@ -1449,22 +1450,13 @@ static void get_commit_info(struct commit *commit, &ret->committer_time, &ret->committer_tz); ret->summary = summary_buf; - tmp = strstr(message, "\n\n"); - if (!tmp) { - error_out: + len = find_commit_subject(message, &subject); + if (len && len < sizeof(summary_buf)) { + memcpy(summary_buf, subject, len); + summary_buf[len] = 0; + } else { sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1)); - free(reencoded); - return; } - tmp += 2; - endp = strchr(tmp, '\n'); - if (!endp) - endp = tmp + strlen(tmp); - len = endp - tmp; - if (len >= sizeof(summary_buf) || len == 0) - goto error_out; - memcpy(summary_buf, tmp, len); - summary_buf[len] = 0; free(reencoded); } diff --git a/builtin/bundle.c b/builtin/bundle.c index 2006cc5cd5..80649ba0b2 100644 --- a/builtin/bundle.c +++ b/builtin/bundle.c @@ -18,7 +18,6 @@ static const char builtin_bundle_usage[] = int cmd_bundle(int argc, const char **argv, const char *prefix) { struct bundle_header header; - int nongit; const char *cmd, *bundle_file; int bundle_fd = -1; char buffer[PATH_MAX]; @@ -31,7 +30,6 @@ int cmd_bundle(int argc, const char **argv, const char *prefix) argc -= 2; argv += 2; - prefix = setup_git_directory_gently(&nongit); if (prefix && bundle_file[0] != '/') { snprintf(buffer, sizeof(buffer), "%s/%s", prefix, bundle_file); bundle_file = buffer; @@ -54,11 +52,11 @@ int cmd_bundle(int argc, const char **argv, const char *prefix) return !!list_bundle_refs(&header, argc, argv); } if (!strcmp(cmd, "create")) { - if (nongit) + if (!startup_info->have_repository) die("Need a repository to create a bundle."); return !!create_bundle(&header, bundle_file, argc, argv); } else if (!strcmp(cmd, "unbundle")) { - if (nongit) + if (!startup_info->have_repository) die("Need a repository to unbundle."); return !!unbundle(&header, bundle_fd) || list_bundle_refs(&header, argc, argv); diff --git a/builtin/checkout.c b/builtin/checkout.c index eef2b48d98..560eae1715 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -18,6 +18,7 @@ #include "xdiff-interface.h" #include "ll-merge.h" #include "resolve-undo.h" +#include "submodule.h" static const char * const checkout_usage[] = { "git checkout [options] <branch>", @@ -32,10 +33,15 @@ struct checkout_opts { int writeout_stage; int writeout_error; + /* not set by parse_options */ + int branch_exists; + const char *new_branch; + const char *new_branch_force; const char *new_orphan_branch; int new_branch_log; enum branch_track track; + struct diff_options diff_options; }; static int post_checkout_hook(struct commit *old, struct commit *new, @@ -150,6 +156,10 @@ static int checkout_merged(int pos, struct checkout *state) read_mmblob(&ours, active_cache[pos+1]->sha1); read_mmblob(&theirs, active_cache[pos+2]->sha1); + /* + * NEEDSWORK: re-create conflicts from merges with + * merge.renormalize set, too + */ status = ll_merge(&result_buf, path, &ancestor, "base", &ours, "ours", &theirs, "theirs", 0); free(ancestor.ptr); @@ -274,11 +284,12 @@ static int checkout_paths(struct tree *source_tree, const char **pathspec, return errs; } -static void show_local_changes(struct object *head) +static void show_local_changes(struct object *head, struct diff_options *opts) { struct rev_info rev; /* I think we want full paths, even if we're in a subdirectory. */ init_revisions(&rev, NULL); + rev.diffopt.flags = opts->flags; rev.diffopt.output_format |= DIFF_FORMAT_NAME_STATUS; if (diff_setup_done(&rev.diffopt) < 0) die("diff_setup_done failed"); @@ -372,7 +383,7 @@ static int merge_working_tree(struct checkout_opts *opts, topts.src_index = &the_index; topts.dst_index = &the_index; - topts.msgs.not_uptodate_file = "You have local changes to '%s'; cannot switch branches."; + setup_unpack_trees_porcelain(&topts, "checkout"); refresh_cache(REFRESH_QUIET); @@ -432,6 +443,13 @@ static int merge_working_tree(struct checkout_opts *opts, */ add_files_to_cache(NULL, NULL, 0); + /* + * NEEDSWORK: carrying over local changes + * when branches have different end-of-line + * normalization (or clean+smudge rules) is + * a pain; plumb in an option to set + * o.renormalize? + */ init_merge_options(&o); o.verbosity = 0; work = write_tree_from_memory(&o); @@ -455,7 +473,7 @@ static int merge_working_tree(struct checkout_opts *opts, die("unable to write new index file"); if (!opts->force && !opts->quiet) - show_local_changes(&new->commit->object); + show_local_changes(&new->commit->object, &opts->diff_options); return 0; } @@ -510,7 +528,8 @@ static void update_refs_for_switch(struct checkout_opts *opts, } } else - create_branch(old->name, opts->new_branch, new->name, 0, + create_branch(old->name, opts->new_branch, new->name, + opts->new_branch_force ? 1 : 0, opts->new_branch_log, opts->track); new->name = opts->new_branch; setup_branch_path(new); @@ -528,9 +547,12 @@ static void update_refs_for_switch(struct checkout_opts *opts, if (old->path && !strcmp(new->path, old->path)) fprintf(stderr, "Already on '%s'\n", new->name); - else + else if (opts->new_branch) fprintf(stderr, "Switched to%s branch '%s'\n", - opts->new_branch ? " a new" : "", + opts->branch_exists ? " and reset" : " a new", + new->name); + else + fprintf(stderr, "Switched to branch '%s'\n", new->name); } if (old->path && old->name) { @@ -599,7 +621,16 @@ static int switch_branches(struct checkout_opts *opts, struct branch_info *new) static int git_checkout_config(const char *var, const char *value, void *cb) { - return git_xmerge_config(var, value, cb); + if (!strcmp(var, "diff.ignoresubmodules")) { + struct checkout_opts *opts = cb; + handle_ignore_submodules_arg(&opts->diff_options, value); + return 0; + } + + if (!prefixcmp(var, "submodule.")) + return parse_submodule_config_option(var, value); + + return git_xmerge_config(var, value, NULL); } static int interactive_checkout(const char *revision, const char **pathspec, @@ -656,7 +687,10 @@ int cmd_checkout(int argc, const char **argv, const char *prefix) int dwim_new_local_branch = 1; struct option options[] = { OPT__QUIET(&opts.quiet), - OPT_STRING('b', NULL, &opts.new_branch, "new branch", "branch"), + OPT_STRING('b', NULL, &opts.new_branch, "branch", + "create and checkout a new branch"), + OPT_STRING('B', NULL, &opts.new_branch_force, "branch", + "create/reset and checkout a branch"), OPT_BOOLEAN('l', NULL, &opts.new_branch_log, "log for new branch"), OPT_SET_INT('t', "track", &opts.track, "track", BRANCH_TRACK_EXPLICIT), @@ -680,13 +714,22 @@ int cmd_checkout(int argc, const char **argv, const char *prefix) memset(&opts, 0, sizeof(opts)); memset(&new, 0, sizeof(new)); - git_config(git_checkout_config, NULL); + gitmodules_config(); + git_config(git_checkout_config, &opts); opts.track = BRANCH_TRACK_UNSPECIFIED; argc = parse_options(argc, argv, prefix, options, checkout_usage, PARSE_OPT_KEEP_DASHDASH); + /* we can assume from now on new_branch = !new_branch_force */ + if (opts.new_branch && opts.new_branch_force) + die("-B cannot be used with -b"); + + /* copy -B over to -b, so that we can just check the latter */ + if (opts.new_branch_force) + opts.new_branch = opts.new_branch_force; + if (patch_mode && (opts.track > 0 || opts.new_branch || opts.new_branch_log || opts.merge || opts.force)) die ("--patch is incompatible with all other options"); @@ -708,7 +751,7 @@ int cmd_checkout(int argc, const char **argv, const char *prefix) if (opts.new_orphan_branch) { if (opts.new_branch) - die("--orphan and -b are mutually exclusive"); + die("--orphan and -b|-B are mutually exclusive"); if (opts.track > 0) die("--orphan cannot be used with -t"); opts.new_branch = opts.new_orphan_branch; @@ -857,8 +900,12 @@ no_reference: if (strbuf_check_branch_ref(&buf, opts.new_branch)) die("git checkout: we do not like '%s' as a branch name.", opts.new_branch); - if (!get_sha1(buf.buf, rev)) - die("git checkout: branch %s already exists", opts.new_branch); + if (!get_sha1(buf.buf, rev)) { + opts.branch_exists = 1; + if (!opts.new_branch_force) + die("git checkout: branch %s already exists", + opts.new_branch); + } strbuf_release(&buf); } diff --git a/builtin/clean.c b/builtin/clean.c index fac64e6cd3..c8798f549e 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -10,12 +10,13 @@ #include "cache.h" #include "dir.h" #include "parse-options.h" +#include "string-list.h" #include "quote.h" static int force = -1; /* unset */ static const char *const builtin_clean_usage[] = { - "git clean [-d] [-f] [-n] [-q] [-x | -X] [--] <paths>...", + "git clean [-d] [-f] [-n] [-q] [-e <pattern>] [-x | -X] [--] <paths>...", NULL }; @@ -26,6 +27,13 @@ static int git_clean_config(const char *var, const char *value, void *cb) return git_default_config(var, value, cb); } +static int exclude_cb(const struct option *opt, const char *arg, int unset) +{ + struct string_list *exclude_list = opt->value; + string_list_append(exclude_list, arg); + return 0; +} + int cmd_clean(int argc, const char **argv, const char *prefix) { int i; @@ -36,6 +44,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix) struct dir_struct dir; static const char **pathspec; struct strbuf buf = STRBUF_INIT; + struct string_list exclude_list = STRING_LIST_INIT_NODUP; const char *qname; char *seen = NULL; struct option options[] = { @@ -44,6 +53,8 @@ int cmd_clean(int argc, const char **argv, const char *prefix) OPT_BOOLEAN('f', "force", &force, "force"), OPT_BOOLEAN('d', NULL, &remove_directories, "remove whole directories"), + { OPTION_CALLBACK, 'e', "exclude", &exclude_list, "pattern", + "exclude <pattern>", PARSE_OPT_NONEG, exclude_cb }, OPT_BOOLEAN('x', NULL, &ignored, "remove ignored files, too"), OPT_BOOLEAN('X', NULL, &ignored_only, "remove only ignored files"), @@ -81,6 +92,9 @@ int cmd_clean(int argc, const char **argv, const char *prefix) if (!ignored) setup_standard_excludes(&dir); + for (i = 0; i < exclude_list.nr; i++) + add_exclude(exclude_list.items[i].string, "", 0, dir.exclude_list); + pathspec = get_pathspec(prefix, argv); fill_directory(&dir, pathspec); @@ -167,5 +181,6 @@ int cmd_clean(int argc, const char **argv, const char *prefix) free(seen); strbuf_release(&directory); + string_list_clear(&exclude_list, 0); return (errors != 0); } diff --git a/builtin/clone.c b/builtin/clone.c index efb1e6faa5..19ed64041d 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -361,7 +361,7 @@ static void write_remote_refs(const struct ref *local_refs) int cmd_clone(int argc, const char **argv, const char *prefix) { - int is_bundle = 0; + int is_bundle = 0, is_local; struct stat buf; const char *repo_name, *repo, *work_tree, *git_dir; char *path, *dir; @@ -414,6 +414,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) repo = xstrdup(make_absolute_path(repo_name)); else repo = repo_name; + is_local = path && !is_bundle; + if (is_local && option_depth) + warning("--depth is ignored in local clones; use file:// instead."); if (argc == 2) dir = xstrdup(argv[1]); @@ -514,7 +517,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) strbuf_reset(&value); - if (path && !is_bundle) { + if (is_local) { refs = clone_local(path, git_dir); mapped_refs = wanted_peer_refs(refs, refspec); } else { diff --git a/builtin/commit.c b/builtin/commit.c index c4a577d5c5..66fdd22024 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -25,6 +25,7 @@ #include "rerere.h" #include "unpack-trees.h" #include "quote.h" +#include "submodule.h" static const char * const builtin_commit_usage[] = { "git commit [options] [--] <filepattern>...", @@ -1073,6 +1074,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) status_format = STATUS_FORMAT_PORCELAIN; wt_status_prepare(&s); + gitmodules_config(); git_config(git_status_config, &s); in_merge = file_exists(git_path("MERGE_HEAD")); argc = parse_options(argc, argv, prefix, diff --git a/builtin/config.c b/builtin/config.c index f3d1660d02..ca4a0db4a7 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -20,7 +20,7 @@ static char delim = '='; static char key_delim = ' '; static char term = '\n'; -static int use_global_config, use_system_config; +static int use_global_config, use_system_config, use_local_config; static const char *given_config_file; static int actions, types; static const char *get_color_slot, *get_colorbool_slot; @@ -51,6 +51,7 @@ static struct option builtin_config_options[] = { OPT_GROUP("Config file location"), OPT_BOOLEAN(0, "global", &use_global_config, "use global config file"), OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"), + OPT_BOOLEAN(0, "local", &use_local_config, "use repository config file"), OPT_STRING('f', "file", &given_config_file, "FILE", "use given config file"), OPT_GROUP("Action"), OPT_BIT(0, "get", &actions, "get value: name [value-regex]", ACTION_GET), @@ -330,11 +331,10 @@ static int get_colorbool(int print) return get_colorbool_found ? 0 : 1; } -int cmd_config(int argc, const char **argv, const char *unused_prefix) +int cmd_config(int argc, const char **argv, const char *prefix) { - int nongit; + int nongit = !startup_info->have_repository; char *value; - const char *prefix = setup_git_directory_gently(&nongit); config_exclusive_filename = getenv(CONFIG_ENVIRONMENT); @@ -342,7 +342,7 @@ int cmd_config(int argc, const char **argv, const char *unused_prefix) builtin_config_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (use_global_config + use_system_config + !!given_config_file > 1) { + if (use_global_config + use_system_config + use_local_config + !!given_config_file > 1) { error("only one config file at a time."); usage_with_options(builtin_config_usage, builtin_config_options); } @@ -358,6 +358,8 @@ int cmd_config(int argc, const char **argv, const char *unused_prefix) } else if (use_system_config) config_exclusive_filename = git_etc_gitconfig(); + else if (use_local_config) + config_exclusive_filename = git_pathdup("config"); else if (given_config_file) { if (!is_absolute_path(given_config_file) && prefix) config_exclusive_filename = prefix_filename(prefix, diff --git a/builtin/diff-files.c b/builtin/diff-files.c index 5b64011de8..951c7c8994 100644 --- a/builtin/diff-files.c +++ b/builtin/diff-files.c @@ -8,6 +8,7 @@ #include "commit.h" #include "revision.h" #include "builtin.h" +#include "submodule.h" static const char diff_files_usage[] = "git diff-files [-q] [-0/-1/2/3 |-c|--cc] [<common diff options>] [<path>...]" @@ -20,6 +21,7 @@ int cmd_diff_files(int argc, const char **argv, const char *prefix) unsigned options = 0; init_revisions(&rev, prefix); + gitmodules_config(); git_config(git_diff_basic_config, NULL); /* no "diff" UI options */ rev.abbrev = 0; diff --git a/builtin/diff-index.c b/builtin/diff-index.c index 04837494fe..2eb32bd9da 100644 --- a/builtin/diff-index.c +++ b/builtin/diff-index.c @@ -3,6 +3,7 @@ #include "commit.h" #include "revision.h" #include "builtin.h" +#include "submodule.h" static const char diff_cache_usage[] = "git diff-index [-m] [--cached] " @@ -17,6 +18,7 @@ int cmd_diff_index(int argc, const char **argv, const char *prefix) int result; init_revisions(&rev, prefix); + gitmodules_config(); git_config(git_diff_basic_config, NULL); /* no "diff" UI options */ rev.abbrev = 0; diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c index 3c78bda566..0d2a3e9fa2 100644 --- a/builtin/diff-tree.c +++ b/builtin/diff-tree.c @@ -3,6 +3,7 @@ #include "commit.h" #include "log-tree.h" #include "builtin.h" +#include "submodule.h" static struct rev_info log_tree_opt; @@ -112,6 +113,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix) int read_stdin = 0; init_revisions(opt, prefix); + gitmodules_config(); git_config(git_diff_basic_config, NULL); /* no "diff" UI options */ opt->abbrev = 0; opt->diff = 1; diff --git a/builtin/diff.c b/builtin/diff.c index 89ae89cde1..a43d326363 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -13,6 +13,7 @@ #include "revision.h" #include "log-tree.h" #include "builtin.h" +#include "submodule.h" struct blobinfo { unsigned char sha1[20]; @@ -279,6 +280,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) */ prefix = setup_git_directory_gently(&nongit); + gitmodules_config(); git_config(git_diff_ui_config, NULL); if (diff_use_color_default == -1) diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 9fe25ff0b3..a9bbf8653d 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -27,6 +27,7 @@ static enum { ABORT, VERBATIM, WARN, STRIP } signed_tag_mode = ABORT; static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ABORT; static int fake_missing_tagger; static int no_data; +static int full_tree; static int parse_opt_signed_tag_mode(const struct option *opt, const char *arg, int unset) @@ -147,10 +148,39 @@ static void handle_object(const unsigned char *sha1) free(buf); } +static int depth_first(const void *a_, const void *b_) +{ + const struct diff_filepair *a = *((const struct diff_filepair **)a_); + const struct diff_filepair *b = *((const struct diff_filepair **)b_); + const char *name_a, *name_b; + int len_a, len_b, len; + int cmp; + + name_a = a->one ? a->one->path : a->two->path; + name_b = b->one ? b->one->path : b->two->path; + + len_a = strlen(name_a); + len_b = strlen(name_b); + len = (len_a < len_b) ? len_a : len_b; + + /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */ + cmp = memcmp(name_a, name_b, len); + if (cmp) + return cmp; + return (len_b - len_a); +} + static void show_filemodify(struct diff_queue_struct *q, struct diff_options *options, void *data) { int i; + + /* + * Handle files below a directory first, in case they are all deleted + * and the directory changes to a file or symlink. + */ + qsort(q->queue, q->nr, sizeof(q->queue[0]), depth_first); + for (i = 0; i < q->nr; i++) { struct diff_filespec *ospec = q->queue[i]->one; struct diff_filespec *spec = q->queue[i]->two; @@ -241,7 +271,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) message += 2; if (commit->parents && - get_object_mark(&commit->parents->item->object) != 0) { + get_object_mark(&commit->parents->item->object) != 0 && + !full_tree) { parse_commit(commit->parents->item); diff_tree_sha1(commit->parents->item->tree->object.sha1, commit->tree->object.sha1, "", &rev->diffopt); @@ -281,6 +312,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) i++; } + if (full_tree) + printf("deleteall\n"); log_tree_diff_flush(rev); rev->diffopt.output_format = saved_output_format; @@ -565,8 +598,8 @@ static void import_marks(char *input_file) int cmd_fast_export(int argc, const char **argv, const char *prefix) { struct rev_info revs; - struct object_array commits = { 0, 0, NULL }; - struct string_list extra_refs = { NULL, 0, 0, 0 }; + struct object_array commits = OBJECT_ARRAY_INIT; + struct string_list extra_refs = STRING_LIST_INIT_NODUP; struct commit *commit; char *export_filename = NULL, *import_filename = NULL; struct option options[] = { @@ -584,6 +617,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) "Import marks from this file"), OPT_BOOLEAN(0, "fake-missing-tagger", &fake_missing_tagger, "Fake a tagger when tags lack one"), + OPT_BOOLEAN(0, "full-tree", &full_tree, + "Output full tree for each commit"), { OPTION_NEGBIT, 0, "data", &no_data, NULL, "Skip output of blob data", PARSE_OPT_NOARG | PARSE_OPT_NEGHELP, NULL, 1 }, @@ -608,6 +643,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) if (import_filename) import_marks(import_filename); + if (import_filename && revs.prune_data) + full_tree = 1; + get_tags_and_duplicates(&revs.pending, &extra_refs); if (prepare_revision_walk(&revs)) diff --git a/builtin/fetch.c b/builtin/fetch.c index 7a53144153..fab3fce512 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -544,40 +544,14 @@ static int will_fetch(struct ref **head, const unsigned char *sha1) return 0; } -struct tag_data { - struct ref **head; - struct ref ***tail; -}; - -static int add_to_tail(struct string_list_item *item, void *cb_data) -{ - struct tag_data *data = (struct tag_data *)cb_data; - struct ref *rm = NULL; - - /* We have already decided to ignore this item */ - if (!item->util) - return 0; - - rm = alloc_ref(item->string); - rm->peer_ref = alloc_ref(item->string); - hashcpy(rm->old_sha1, item->util); - - **data->tail = rm; - *data->tail = &rm->next; - - return 0; -} - static void find_non_local_tags(struct transport *transport, struct ref **head, struct ref ***tail) { - struct string_list existing_refs = { NULL, 0, 0, 0 }; - struct string_list remote_refs = { NULL, 0, 0, 0 }; - struct tag_data data; + struct string_list existing_refs = STRING_LIST_INIT_NODUP; + struct string_list remote_refs = STRING_LIST_INIT_NODUP; const struct ref *ref; struct string_list_item *item = NULL; - data.head = head; data.tail = tail; for_each_ref(add_existing, &existing_refs); for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) { @@ -631,10 +605,20 @@ static void find_non_local_tags(struct transport *transport, item->util = NULL; /* - * For all the tags in the remote_refs string list, call - * add_to_tail to add them to the list of refs to be fetched + * For all the tags in the remote_refs string list, + * add them to the list of refs to be fetched */ - for_each_string_list(&remote_refs, add_to_tail, &data); + for_each_string_list_item(item, &remote_refs) { + /* Unless we have already decided to ignore this item... */ + if (item->util) + { + struct ref *rm = alloc_ref(item->string); + rm->peer_ref = alloc_ref(item->string); + hashcpy(rm->old_sha1, item->util); + **tail = rm; + *tail = &rm->next; + } + } string_list_clear(&remote_refs, 0); } @@ -667,7 +651,7 @@ static int truncate_fetch_head(void) static int do_fetch(struct transport *transport, struct refspec *refs, int ref_count) { - struct string_list existing_refs = { NULL, 0, 0, 0 }; + struct string_list existing_refs = STRING_LIST_INIT_NODUP; struct string_list_item *peer_item = NULL; struct ref *ref_map; struct ref *rm; @@ -893,7 +877,7 @@ static int fetch_one(struct remote *remote, int argc, const char **argv) int cmd_fetch(int argc, const char **argv, const char *prefix) { int i; - struct string_list list = { NULL, 0, 0, 0 }; + struct string_list list = STRING_LIST_INIT_NODUP; struct remote *remote; int result = 0; diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index bc3c5e6d3e..e7e12eea25 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -7,7 +7,7 @@ #include "string-list.h" static const char * const fmt_merge_msg_usage[] = { - "git fmt-merge-msg [--log|--no-log] [--file <file>]", + "git fmt-merge-msg [-m <message>] [--log|--no-log] [--file <file>]", NULL }; @@ -38,8 +38,8 @@ void init_src_data(struct src_data *data) data->generic.strdup_strings = 1; } -static struct string_list srcs = { NULL, 0, 0, 1 }; -static struct string_list origins = { NULL, 0, 0, 1 }; +static struct string_list srcs = STRING_LIST_INIT_DUP; +static struct string_list origins = STRING_LIST_INIT_DUP; static int handle_line(char *line) { @@ -146,7 +146,7 @@ static void shortlog(const char *name, unsigned char *sha1, int i, count = 0; struct commit *commit; struct object *branch; - struct string_list subjects = { NULL, 0, 0, 1 }; + struct string_list subjects = STRING_LIST_INIT_DUP; int flags = UNINTERESTING | TREESAME | SEEN | SHOWN | ADDED; struct strbuf sb = STRBUF_INIT; @@ -319,11 +319,14 @@ int fmt_merge_msg_shortlog(struct strbuf *in, struct strbuf *out) { int cmd_fmt_merge_msg(int argc, const char **argv, const char *prefix) { const char *inpath = NULL; + const char *message = NULL; struct option options[] = { OPT_BOOLEAN(0, "log", &merge_summary, "populate log with the shortlog"), { OPTION_BOOLEAN, 0, "summary", &merge_summary, NULL, "alias for --log (deprecated)", PARSE_OPT_NOARG | PARSE_OPT_HIDDEN }, + OPT_STRING('m', "message", &message, "text", + "use <text> as start of message"), OPT_FILENAME('F', "file", &inpath, "file to read from"), OPT_END() }; @@ -337,6 +340,12 @@ int cmd_fmt_merge_msg(int argc, const char **argv, const char *prefix) 0); if (argc > 0) usage_with_options(fmt_merge_msg_usage, options); + if (message && !merge_summary) { + char nl = '\n'; + write_in_full(STDOUT_FILENO, message, strlen(message)); + write_in_full(STDOUT_FILENO, &nl, 1); + return 0; + } if (inpath && strcmp(inpath, "-")) { in = fopen(inpath, "r"); @@ -346,7 +355,12 @@ int cmd_fmt_merge_msg(int argc, const char **argv, const char *prefix) if (strbuf_read(&input, fileno(in), 0) < 0) die_errno("could not read input file"); - ret = fmt_merge_msg(merge_summary, &input, &output); + if (message) { + strbuf_addstr(&output, message); + ret = fmt_merge_msg_shortlog(&input, &output); + } else { + ret = fmt_merge_msg(merge_summary, &input, &output); + } if (ret) return ret; write_in_full(STDOUT_FILENO, output.buf, output.len); diff --git a/builtin/grep.c b/builtin/grep.c index 597f76bc42..da32f3df34 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -834,12 +834,12 @@ int cmd_grep(int argc, const char **argv, const char *prefix) int external_grep_allowed__ignored; const char *show_in_pager = NULL, *default_pager = "dummy"; struct grep_opt opt; - struct object_array list = { 0, 0, NULL }; + struct object_array list = OBJECT_ARRAY_INIT; const char **paths = NULL; - struct string_list path_list = { NULL, 0, 0, 0 }; + struct string_list path_list = STRING_LIST_INIT_NODUP; int i; int dummy; - int nongit = 0, use_index = 1; + int use_index = 1; struct option options[] = { OPT_BOOLEAN(0, "cached", &cached, "search in index instead of in the work tree"), @@ -930,8 +930,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) OPT_END() }; - prefix = setup_git_directory_gently(&nongit); - /* * 'git grep -h', unlike 'git grep -h <pattern>', is a request * to show usage information and exit. @@ -976,7 +974,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_NO_INTERNAL_HELP); - if (use_index && nongit) + if (use_index && !startup_info->have_repository) /* die the same way as if we did it at the beginning */ setup_git_directory(); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index fad76bf7a8..2e680d7a7a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -886,25 +886,9 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) read_replace_refs = 0; - /* - * We wish to read the repository's config file if any, and - * for that it is necessary to call setup_git_directory_gently(). - * However if the cwd was inside .git/objects/pack/ then we need - * to go back there or all the pack name arguments will be wrong. - * And in that case we cannot rely on any prefix returned by - * setup_git_directory_gently() either. - */ - { - char cwd[PATH_MAX+1]; - int nongit; - - if (!getcwd(cwd, sizeof(cwd)-1)) - die("Unable to get current working directory"); - setup_git_directory_gently(&nongit); - git_config(git_index_pack_config, NULL); - if (chdir(cwd)) - die("Cannot come back to cwd"); - } + git_config(git_index_pack_config, NULL); + if (prefix && chdir(prefix)) + die("Cannot come back to cwd"); for (i = 1; i < argc; i++) { const char *arg = argv[i]; diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 1b9b8a8b4a..bb4f612b3d 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -25,6 +25,7 @@ static int show_modified; static int show_killed; static int show_valid_bit; static int line_terminator = '\n'; +static int debug_mode; static const char *prefix; static int max_prefix_len; @@ -162,35 +163,41 @@ static void show_ce_entry(const char *tag, struct cache_entry *ce) ce_stage(ce)); } write_name(ce->name, ce_namelen(ce)); -} - -static int show_one_ru(struct string_list_item *item, void *cbdata) -{ - const char *path = item->string; - struct resolve_undo_info *ui = item->util; - int i, len; - - len = strlen(path); - if (len < max_prefix_len) - return 0; /* outside of the prefix */ - if (!match_pathspec(pathspec, path, len, max_prefix_len, ps_matched)) - return 0; /* uninterested */ - for (i = 0; i < 3; i++) { - if (!ui->mode[i]) - continue; - printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i], - find_unique_abbrev(ui->sha1[i], abbrev), - i + 1); - write_name(path, len); + if (debug_mode) { + printf(" ctime: %d:%d\n", ce->ce_ctime.sec, ce->ce_ctime.nsec); + printf(" mtime: %d:%d\n", ce->ce_mtime.sec, ce->ce_mtime.nsec); + printf(" dev: %d\tino: %d\n", ce->ce_dev, ce->ce_ino); + printf(" uid: %d\tgid: %d\n", ce->ce_uid, ce->ce_gid); + printf(" size: %d\tflags: %x\n", ce->ce_size, ce->ce_flags); } - return 0; } static void show_ru_info(void) { + struct string_list_item *item; + if (!the_index.resolve_undo) return; - for_each_string_list(the_index.resolve_undo, show_one_ru, NULL); + + for_each_string_list_item(item, the_index.resolve_undo) { + const char *path = item->string; + struct resolve_undo_info *ui = item->util; + int i, len; + + len = strlen(path); + if (len < max_prefix_len) + continue; /* outside of the prefix */ + if (!match_pathspec(pathspec, path, len, max_prefix_len, ps_matched)) + continue; /* uninterested */ + for (i = 0; i < 3; i++) { + if (!ui->mode[i]) + continue; + printf("%s%06o %s %d\t", tag_resolve_undo, ui->mode[i], + find_unique_abbrev(ui->sha1[i], abbrev), + i + 1); + write_name(path, len); + } + } } static void show_files(struct dir_struct *dir) @@ -519,6 +526,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix) OPT_STRING(0, "with-tree", &with_tree, "tree-ish", "pretend that paths removed since <tree-ish> are still present"), OPT__ABBREV(&abbrev), + OPT_BOOLEAN(0, "debug", &debug_mode, "show debugging data"), OPT_END() }; diff --git a/builtin/ls-remote.c b/builtin/ls-remote.c index 34480cfad6..97eed4012b 100644 --- a/builtin/ls-remote.c +++ b/builtin/ls-remote.c @@ -32,7 +32,6 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) { int i; const char *dest = NULL; - int nongit; unsigned flags = 0; int quiet = 0; const char *uploadpack = NULL; @@ -42,8 +41,6 @@ int cmd_ls_remote(int argc, const char **argv, const char *prefix) struct transport *transport; const struct ref *ref; - setup_git_directory_gently(&nongit); - for (i = 1; i < argc; i++) { const char *arg = argv[i]; diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c index e4560da191..99654d0222 100644 --- a/builtin/mailsplit.c +++ b/builtin/mailsplit.c @@ -137,7 +137,7 @@ static int split_maildir(const char *maildir, const char *dir, char name[PATH_MAX]; int ret = -1; int i; - struct string_list list = {NULL, 0, 0, 1}; + struct string_list list = STRING_LIST_INIT_DUP; if (populate_maildir_list(&list, maildir) < 0) goto out; diff --git a/builtin/merge-base.c b/builtin/merge-base.c index 54e7ec2237..96dd160731 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -23,7 +23,8 @@ static int show_merge_base(struct commit **rev, int rev_nr, int show_all) } static const char * const merge_base_usage[] = { - "git merge-base [-a|--all] <commit> <commit>...", + "git merge-base [-a|--all] [--octopus] <commit> <commit>...", + "git merge-base --independent <commit>...", NULL }; @@ -41,21 +42,58 @@ static struct commit *get_commit_reference(const char *arg) return r; } +static int handle_octopus(int count, const char **args, int reduce, int show_all) +{ + struct commit_list *revs = NULL; + struct commit_list *result; + int i; + + if (reduce) + show_all = 1; + + for (i = count - 1; i >= 0; i--) + commit_list_insert(get_commit_reference(args[i]), &revs); + + result = reduce ? reduce_heads(revs) : get_octopus_merge_bases(revs); + + if (!result) + return 1; + + while (result) { + printf("%s\n", sha1_to_hex(result->item->object.sha1)); + if (!show_all) + return 0; + result = result->next; + } + + return 0; +} + int cmd_merge_base(int argc, const char **argv, const char *prefix) { struct commit **rev; int rev_nr = 0; int show_all = 0; + int octopus = 0; + int reduce = 0; struct option options[] = { - OPT_BOOLEAN('a', "all", &show_all, "outputs all common ancestors"), + OPT_BOOLEAN('a', "all", &show_all, "output all common ancestors"), + OPT_BOOLEAN(0, "octopus", &octopus, "find ancestors for a single n-way merge"), + OPT_BOOLEAN(0, "independent", &reduce, "list revs not reachable from others"), OPT_END() }; git_config(git_default_config, NULL); argc = parse_options(argc, argv, prefix, options, merge_base_usage, 0); - if (argc < 2) + if (!octopus && !reduce && argc < 2) usage_with_options(merge_base_usage, options); + if (reduce && (show_all || octopus)) + die("--independent cannot be used with other options"); + + if (octopus || reduce) + return handle_octopus(argc, argv, reduce, show_all); + rev = xmalloc(argc * sizeof(*rev)); while (argc-- > 0) rev[rev_nr++] = get_commit_reference(*argv++); diff --git a/builtin/merge-file.c b/builtin/merge-file.c index b8e9e5ba01..b6664d49be 100644 --- a/builtin/merge-file.c +++ b/builtin/merge-file.c @@ -28,7 +28,6 @@ int cmd_merge_file(int argc, const char **argv, const char *prefix) xmparam_t xmp = {{0}}; int ret = 0, i = 0, to_stdout = 0; int quiet = 0; - int nongit; struct option options[] = { OPT_BOOLEAN('p', "stdout", &to_stdout, "send results to standard output"), OPT_SET_INT(0, "diff3", &xmp.style, "use a diff3 based merge", XDL_MERGE_DIFF3), @@ -50,8 +49,7 @@ int cmd_merge_file(int argc, const char **argv, const char *prefix) xmp.style = 0; xmp.favor = 0; - prefix = setup_git_directory_gently(&nongit); - if (!nongit) { + if (startup_info->have_repository) { /* Read the configuration file */ git_config(git_xmerge_config, NULL); if (0 <= git_xmerge_style) diff --git a/builtin/merge-recursive.c b/builtin/merge-recursive.c index 3d00adbfc7..78b9db76a0 100644 --- a/builtin/merge-recursive.c +++ b/builtin/merge-recursive.c @@ -48,6 +48,10 @@ int cmd_merge_recursive(int argc, const char **argv, const char *prefix) o.subtree_shift = ""; else if (!prefixcmp(arg+2, "subtree=")) o.subtree_shift = arg + 10; + else if (!strcmp(arg+2, "renormalize")) + o.renormalize = 1; + else if (!strcmp(arg+2, "no-renormalize")) + o.renormalize = 0; else die("Unknown option %s", arg); continue; diff --git a/builtin/merge.c b/builtin/merge.c index 37ce4f589f..5f65c0c8a6 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -54,6 +54,7 @@ static size_t use_strategies_nr, use_strategies_alloc; static const char **xopts; static size_t xopts_nr, xopts_alloc; static const char *branch; +static int option_renormalize; static int verbosity; static int allow_rerere_auto; @@ -131,6 +132,7 @@ static struct strategy *get_strategy(const char *name) ret = xcalloc(1, sizeof(struct strategy)); ret->name = xstrdup(name); + ret->attr = NO_TRIVIAL; return ret; } @@ -437,7 +439,7 @@ static void merge_name(const char *remote, struct strbuf *msg) strbuf_addstr(&truname, "refs/heads/"); strbuf_addstr(&truname, remote); strbuf_setlen(&truname, truname.len - len); - if (resolve_ref(truname.buf, buf_sha, 0, NULL)) { + if (resolve_ref(truname.buf, buf_sha, 1, NULL)) { strbuf_addf(msg, "%s\t\tbranch '%s'%s of .\n", sha1_to_hex(remote_head->sha1), @@ -486,7 +488,8 @@ static int git_merge_config(const char *k, const char *v, void *cb) buf = xstrdup(v); argc = split_cmdline(buf, &argv); if (argc < 0) - die("Bad branch.%s.mergeoptions string", branch); + die("Bad branch.%s.mergeoptions string: %s", branch, + split_cmdline_strerror(argc)); argv = xrealloc(argv, sizeof(*argv) * (argc + 2)); memmove(argv + 1, argv, sizeof(*argv) * (argc + 1)); argc++; @@ -503,6 +506,8 @@ static int git_merge_config(const char *k, const char *v, void *cb) return git_config_string(&pull_octopus, k, v); else if (!strcmp(k, "merge.log") || !strcmp(k, "merge.summary")) option_log = git_config_bool(k, v); + else if (!strcmp(k, "merge.renormalize")) + option_renormalize = git_config_bool(k, v); return git_diff_ui_config(k, v, cb); } @@ -624,6 +629,11 @@ static int try_merge_strategy(const char *strategy, struct commit_list *common, if (!strcmp(strategy, "subtree")) o.subtree_shift = ""; + o.renormalize = option_renormalize; + + /* + * NEEDSWORK: merge with table in builtin/merge-recursive + */ for (x = 0; x < xopts_nr; x++) { if (!strcmp(xopts[x], "ours")) o.recursive_variant = MERGE_RECURSIVE_OURS; @@ -633,6 +643,10 @@ static int try_merge_strategy(const char *strategy, struct commit_list *common, o.subtree_shift = ""; else if (!prefixcmp(xopts[x], "subtree=")) o.subtree_shift = xopts[x]+8; + else if (!strcmp(xopts[x], "renormalize")) + o.renormalize = 1; + else if (!strcmp(xopts[x], "no-renormalize")) + o.renormalize = 0; else die("Unknown option for merge-recursive: -X%s", xopts[x]); } @@ -704,7 +718,7 @@ int checkout_fast_forward(const unsigned char *head, const unsigned char *remote opts.verbose_update = 1; opts.merge = 1; opts.fn = twoway_merge; - opts.msgs = get_porcelain_error_msgs(); + setup_unpack_trees_porcelain(&opts, "merge"); trees[nr_trees] = parse_tree_indirect(head); if (!trees[nr_trees++]) @@ -816,7 +830,7 @@ static int finish_automerge(struct commit_list *common, return 0; } -static int suggest_conflicts(void) +static int suggest_conflicts(int renormalizing) { FILE *fp; int pos; @@ -1301,5 +1315,5 @@ int cmd_merge(int argc, const char **argv, const char *prefix) "stopped before committing as requested\n"); return 0; } else - return suggest_conflicts(); + return suggest_conflicts(option_renormalize); } diff --git a/builtin/mv.c b/builtin/mv.c index 38574b89f7..cdbb09473c 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -63,7 +63,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix) const char **source, **destination, **dest_path; enum update_mode { BOTH = 0, WORKING_DIRECTORY, INDEX } *modes; struct stat st; - struct string_list src_for_dst = {NULL, 0, 0, 0}; + struct string_list src_for_dst = STRING_LIST_INIT_NODUP; git_config(git_default_config, NULL); diff --git a/builtin/name-rev.c b/builtin/name-rev.c index 06a38ac8c1..31f5c1c971 100644 --- a/builtin/name-rev.c +++ b/builtin/name-rev.c @@ -220,7 +220,7 @@ static void name_rev_line(char *p, struct name_ref_data *data) int cmd_name_rev(int argc, const char **argv, const char *prefix) { - struct object_array revs = { 0, 0, NULL }; + struct object_array revs = OBJECT_ARRAY_INIT; int all = 0, transform_stdin = 0, allow_undefined = 1, always = 0; struct name_ref_data data = { 0, 0, NULL }; struct option opts[] = { diff --git a/builtin/push.c b/builtin/push.c index 69bc2f27ac..e655eb7695 100644 --- a/builtin/push.c +++ b/builtin/push.c @@ -22,13 +22,13 @@ static int progress; static const char **refspec; static int refspec_nr; +static int refspec_alloc; static void add_refspec(const char *ref) { - int nr = refspec_nr + 1; - refspec = xrealloc(refspec, nr * sizeof(char *)); - refspec[nr-1] = ref; - refspec_nr = nr; + refspec_nr++; + ALLOC_GROW(refspec, refspec_nr, refspec_alloc); + refspec[refspec_nr-1] = ref; } static void set_refspecs(const char **refs, int nr) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index d634b5a3d5..760817dbd7 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -530,7 +530,7 @@ static void check_aliased_update(struct command *cmd, struct string_list *list) static void check_aliased_updates(struct command *commands) { struct command *cmd; - struct string_list ref_list = { NULL, 0, 0, 0 }; + struct string_list ref_list = STRING_LIST_INIT_NODUP; for (cmd = commands; cmd; cmd = cmd->next) { struct string_list_item *item = diff --git a/builtin/remote.c b/builtin/remote.c index 6699bc5712..48e0a6bf26 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -134,7 +134,7 @@ static int add_branch(const char *key, const char *branchname, static int add(int argc, const char **argv) { int fetch = 0, mirror = 0, fetch_tags = TAGS_DEFAULT; - struct string_list track = { NULL, 0, 0 }; + struct string_list track = STRING_LIST_INIT_NODUP; const char *master = NULL; struct remote *remote; struct strbuf buf = STRBUF_INIT, buf2 = STRBUF_INIT; @@ -596,7 +596,7 @@ static int mv(int argc, const char **argv) }; struct remote *oldremote, *newremote; struct strbuf buf = STRBUF_INIT, buf2 = STRBUF_INIT, buf3 = STRBUF_INIT; - struct string_list remote_branches = { NULL, 0, 0, 0 }; + struct string_list remote_branches = STRING_LIST_INIT_NODUP; struct rename_info rename; int i; @@ -734,8 +734,8 @@ static int rm(int argc, const char **argv) struct remote *remote; struct strbuf buf = STRBUF_INIT; struct known_remotes known_remotes = { NULL, NULL }; - struct string_list branches = { NULL, 0, 0, 1 }; - struct string_list skipped = { NULL, 0, 0, 1 }; + struct string_list branches = STRING_LIST_INIT_DUP; + struct string_list skipped = STRING_LIST_INIT_DUP; struct branches_for_remote cb_data; int i, result; @@ -1044,7 +1044,7 @@ static int show(int argc, const char **argv) OPT_END() }; struct ref_states states; - struct string_list info_list = { NULL, 0, 0, 0 }; + struct string_list info_list = STRING_LIST_INIT_NODUP; struct show_info info; argc = parse_options(argc, argv, NULL, options, builtin_remote_show_usage, @@ -1483,7 +1483,7 @@ static int get_one_entry(struct remote *remote, void *priv) static int show_all(void) { - struct string_list list = { NULL, 0, 0 }; + struct string_list list = STRING_LIST_INIT_NODUP; int result; list.strdup_strings = 1; diff --git a/builtin/rerere.c b/builtin/rerere.c index 0c7202eb9e..642bf35587 100644 --- a/builtin/rerere.c +++ b/builtin/rerere.c @@ -1,13 +1,16 @@ #include "builtin.h" #include "cache.h" #include "dir.h" +#include "parse-options.h" #include "string-list.h" #include "rerere.h" #include "xdiff/xdiff.h" #include "xdiff-interface.h" -static const char git_rerere_usage[] = -"git rerere [clear | status | diff | gc]"; +static const char * const rerere_usage[] = { + "git rerere [clear | status | diff | gc]", + NULL, +}; /* these values are days */ static int cutoff_noresolve = 15; @@ -46,7 +49,7 @@ static int git_rerere_gc_config(const char *var, const char *value, void *cb) static void garbage_collect(struct string_list *rr) { - struct string_list to_remove = { NULL, 0, 0, 1 }; + struct string_list to_remove = STRING_LIST_INIT_DUP; DIR *dir; struct dirent *e; int i, cutoff; @@ -113,26 +116,27 @@ static int diff_two(const char *file1, const char *label1, int cmd_rerere(int argc, const char **argv, const char *prefix) { - struct string_list merge_rr = { NULL, 0, 0, 1 }; - int i, fd, flags = 0; - - if (2 < argc) { - if (!strcmp(argv[1], "-h")) - usage(git_rerere_usage); - if (!strcmp(argv[1], "--rerere-autoupdate")) - flags = RERERE_AUTOUPDATE; - else if (!strcmp(argv[1], "--no-rerere-autoupdate")) - flags = RERERE_NOAUTOUPDATE; - if (flags) { - argc--; - argv++; - } - } - if (argc < 2) + struct string_list merge_rr = STRING_LIST_INIT_DUP; + int i, fd, autoupdate = -1, flags = 0; + + struct option options[] = { + OPT_SET_INT(0, "rerere-autoupdate", &autoupdate, + "register clean resolutions in index", 1), + OPT_END(), + }; + + argc = parse_options(argc, argv, prefix, options, rerere_usage, 0); + + if (autoupdate == 1) + flags = RERERE_AUTOUPDATE; + if (autoupdate == 0) + flags = RERERE_NOAUTOUPDATE; + + if (argc < 1) return rerere(flags); - if (!strcmp(argv[1], "forget")) { - const char **pathspec = get_pathspec(prefix, argv + 2); + if (!strcmp(argv[0], "forget")) { + const char **pathspec = get_pathspec(prefix, argv + 1); return rerere_forget(pathspec); } @@ -140,26 +144,26 @@ int cmd_rerere(int argc, const char **argv, const char *prefix) if (fd < 0) return 0; - if (!strcmp(argv[1], "clear")) { + if (!strcmp(argv[0], "clear")) { for (i = 0; i < merge_rr.nr; i++) { const char *name = (const char *)merge_rr.items[i].util; if (!has_rerere_resolution(name)) unlink_rr_item(name); } unlink_or_warn(git_path("MERGE_RR")); - } else if (!strcmp(argv[1], "gc")) + } else if (!strcmp(argv[0], "gc")) garbage_collect(&merge_rr); - else if (!strcmp(argv[1], "status")) + else if (!strcmp(argv[0], "status")) for (i = 0; i < merge_rr.nr; i++) printf("%s\n", merge_rr.items[i].string); - else if (!strcmp(argv[1], "diff")) + else if (!strcmp(argv[0], "diff")) for (i = 0; i < merge_rr.nr; i++) { const char *path = merge_rr.items[i].string; const char *name = (const char *)merge_rr.items[i].util; diff_two(rerere_path(name, "preimage"), path, path, path); } else - usage(git_rerere_usage); + usage_with_options(rerere_usage, options); string_list_clear(&merge_rr, 1); return 0; diff --git a/builtin/reset.c b/builtin/reset.c index 1283068fd2..0037be4693 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -318,7 +318,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) * affecting the working tree nor HEAD. */ if (i < argc) { if (reset_type == MIXED) - warning("--mixed option is deprecated with paths."); + warning("--mixed with paths is deprecated; use 'git reset -- <paths>' instead."); else if (reset_type != NONE) die("Cannot do %s reset with paths.", reset_type_names[reset_type]); diff --git a/builtin/revert.c b/builtin/revert.c index 54d13cf5ef..4b47ace36b 100644 --- a/builtin/revert.c +++ b/builtin/revert.c @@ -102,9 +102,9 @@ struct commit_message { static int get_message(const char *raw_message, struct commit_message *out) { const char *encoding; - const char *p, *abbrev, *eol; + const char *abbrev, *subject; + int abbrev_len, subject_len; char *q; - int abbrev_len, oneline_len; if (!raw_message) return -1; @@ -125,27 +125,17 @@ static int get_message(const char *raw_message, struct commit_message *out) abbrev = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV); abbrev_len = strlen(abbrev); - /* Find beginning and end of commit subject. */ - p = out->message; - while (*p && (*p != '\n' || p[1] != '\n')) - p++; - if (*p) { - p += 2; - for (eol = p + 1; *eol && *eol != '\n'; eol++) - ; /* do nothing */ - } else - eol = p; - oneline_len = eol - p; + subject_len = find_commit_subject(out->message, &subject); out->parent_label = xmalloc(strlen("parent of ") + abbrev_len + - strlen("... ") + oneline_len + 1); + strlen("... ") + subject_len + 1); q = out->parent_label; q = mempcpy(q, "parent of ", strlen("parent of ")); out->label = q; q = mempcpy(q, abbrev, abbrev_len); q = mempcpy(q, "... ", strlen("... ")); out->subject = q; - q = mempcpy(q, p, oneline_len); + q = mempcpy(q, subject, subject_len); *q = '\0'; return 0; } @@ -241,27 +231,30 @@ static void set_author_ident_env(const char *message) sha1_to_hex(commit->object.sha1)); } -static char *help_msg(void) +static void advise(const char *advice, ...) { - struct strbuf helpbuf = STRBUF_INIT; - char *msg = getenv("GIT_CHERRY_PICK_HELP"); + va_list params; - if (msg) - return msg; + va_start(params, advice); + vreportf("hint: ", advice, params); + va_end(params); +} - strbuf_addstr(&helpbuf, " After resolving the conflicts,\n" - "mark the corrected paths with 'git add <paths>' or 'git rm <paths>'\n" - "and commit the result"); +static void print_advice(void) +{ + char *msg = getenv("GIT_CHERRY_PICK_HELP"); - if (action == CHERRY_PICK) { - strbuf_addf(&helpbuf, " with: \n" - "\n" - " git commit -c %s\n", - sha1_to_hex(commit->object.sha1)); + if (msg) { + fprintf(stderr, "%s\n", msg); + return; } - else - strbuf_addch(&helpbuf, '.'); - return strbuf_detach(&helpbuf, NULL); + + advise("after resolving the conflicts, mark the corrected paths"); + advise("with 'git add <paths>' or 'git rm <paths>'"); + + if (action == CHERRY_PICK) + advise("and commit the result with 'git commit -c %s'", + find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV)); } static void write_message(struct strbuf *msgbuf, const char *filename) @@ -311,10 +304,9 @@ static int fast_forward_to(const unsigned char *to, const unsigned char *from) return write_ref_sha1(ref_lock, to, "cherry-pick"); } -static void do_recursive_merge(struct commit *base, struct commit *next, - const char *base_label, const char *next_label, - unsigned char *head, struct strbuf *msgbuf, - char *defmsg) +static int do_recursive_merge(struct commit *base, struct commit *next, + const char *base_label, const char *next_label, + unsigned char *head, struct strbuf *msgbuf) { struct merge_options o; struct tree *result, *next_tree, *base_tree, *head_tree; @@ -324,6 +316,13 @@ static void do_recursive_merge(struct commit *base, struct commit *next, index_fd = hold_locked_index(&index_lock, 1); read_cache(); + + /* + * NEEDSWORK: cherry-picking between branches with + * different end-of-line normalization is a pain; + * plumb in an option to set o.renormalize? + * (or better: arbitrary -X options) + */ init_merge_options(&o); o.ancestor = base ? base_label : "(empty tree)"; o.branch1 = "HEAD"; @@ -357,14 +356,35 @@ static void do_recursive_merge(struct commit *base, struct commit *next, i++; } } - write_message(msgbuf, defmsg); - fprintf(stderr, "Automatic %s failed.%s\n", - me, help_msg()); - rerere(allow_rerere_auto); - exit(1); } - write_message(msgbuf, defmsg); - fprintf(stderr, "Finished one %s.\n", me); + + return !clean; +} + +/* + * If we are cherry-pick, and if the merge did not result in + * hand-editing, we will hit this commit and inherit the original + * author date and name. + * If we are revert, or if our cherry-pick results in a hand merge, + * we had better say that the current user is responsible for that. + */ +static int run_git_commit(const char *defmsg) +{ + /* 6 is max possible length of our args array including NULL */ + const char *args[6]; + int i = 0; + + args[i++] = "commit"; + args[i++] = "-n"; + if (signoff) + args[i++] = "-s"; + if (!edit) { + args[i++] = "-F"; + args[i++] = defmsg; + } + args[i] = NULL; + + return run_command_v_opt(args, RUN_GIT_CMD); } static int do_pick_commit(void) @@ -375,6 +395,7 @@ static int do_pick_commit(void) struct commit_message msg = { NULL, NULL, NULL, NULL, NULL }; char *defmsg = NULL; struct strbuf msgbuf = STRBUF_INIT; + int res; if (no_commit) { /* @@ -470,62 +491,40 @@ static int do_pick_commit(void) } } - if (!strategy || !strcmp(strategy, "recursive") || action == REVERT) - do_recursive_merge(base, next, base_label, next_label, - head, &msgbuf, defmsg); - else { - int res; + if (!strategy || !strcmp(strategy, "recursive") || action == REVERT) { + res = do_recursive_merge(base, next, base_label, next_label, + head, &msgbuf); + write_message(&msgbuf, defmsg); + } else { struct commit_list *common = NULL; struct commit_list *remotes = NULL; + write_message(&msgbuf, defmsg); + commit_list_insert(base, &common); commit_list_insert(next, &remotes); res = try_merge_command(strategy, common, sha1_to_hex(head), remotes); free_commit_list(common); free_commit_list(remotes); - if (res) { - fprintf(stderr, "Automatic %s with strategy %s failed.%s\n", - me, strategy, help_msg()); - rerere(allow_rerere_auto); - exit(1); - } } - free_message(&msg); - - /* - * If we are cherry-pick, and if the merge did not result in - * hand-editing, we will hit this commit and inherit the original - * author date and name. - * If we are revert, or if our cherry-pick results in a hand merge, - * we had better say that the current user is responsible for that. - */ - - if (!no_commit) { - /* 6 is max possible length of our args array including NULL */ - const char *args[6]; - int res; - int i = 0; - - args[i++] = "commit"; - args[i++] = "-n"; - if (signoff) - args[i++] = "-s"; - if (!edit) { - args[i++] = "-F"; - args[i++] = defmsg; - } - args[i] = NULL; - res = run_command_v_opt(args, RUN_GIT_CMD); - free(defmsg); - - return res; + if (res) { + error("could not %s %s... %s", + action == REVERT ? "revert" : "apply", + find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV), + msg.subject); + print_advice(); + rerere(allow_rerere_auto); + } else { + if (!no_commit) + res = run_git_commit(defmsg); } + free_message(&msg); free(defmsg); - return 0; + return res; } static void prepare_revs(struct rev_info *revs) diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 0a9681ba7e..2135b0dde1 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -249,7 +249,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) { static struct shortlog log; static struct rev_info rev; - int nongit; + int nongit = !startup_info->have_repository; static const struct option options[] = { OPT_BOOLEAN('n', "numbered", &log.sort_by_number, @@ -265,7 +265,6 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) struct parse_opt_ctx_t ctx; - prefix = setup_git_directory_gently(&nongit); git_config(git_default_config, NULL); shortlog_init(&log); init_revisions(&rev, prefix); diff --git a/builtin/show-ref.c b/builtin/show-ref.c index 0b2a9ad1a9..be9b512eeb 100644 --- a/builtin/show-ref.c +++ b/builtin/show-ref.c @@ -120,7 +120,7 @@ static int add_existing(const char *refname, const unsigned char *sha1, int flag */ static int exclude_existing(const char *match) { - static struct string_list existing_refs = { NULL, 0, 0, 0 }; + static struct string_list existing_refs = STRING_LIST_INIT_NODUP; char buf[1024]; int matchlen = match ? strlen(match) : 0; diff --git a/builtin/var.c b/builtin/var.c index 70fdb4dec7..0744bb8318 100644 --- a/builtin/var.c +++ b/builtin/var.c @@ -74,14 +74,9 @@ static int show_config(const char *var, const char *value, void *cb) int cmd_var(int argc, const char **argv, const char *prefix) { - const char *val; - int nongit; - if (argc != 2) { + const char *val = NULL; + if (argc != 2) usage(var_usage); - } - - setup_git_directory_gently(&nongit); - val = NULL; if (strcmp(argv[1], "-l") == 0) { git_config(show_config, NULL); @@ -179,8 +179,7 @@ struct cache_entry { #define CE_UNHASHED (0x200000) #define CE_CONFLICTED (0x800000) -/* Only remove in work directory, not index */ -#define CE_WT_REMOVE (0x400000) +#define CE_WT_REMOVE (0x400000) /* remove in work directory */ #define CE_UNPACKED (0x1000000) @@ -641,6 +640,9 @@ extern char *git_pathdup(const char *fmt, ...) /* Return a statically allocated filename matching the sha1 signature */ extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); extern char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +extern char *git_path_submodule(const char *path, const char *fmt, ...) + __attribute__((format (printf, 2, 3))); + extern char *sha1_file_name(const unsigned char *sha1); extern char *sha1_pack_name(const unsigned char *sha1); extern char *sha1_pack_index_name(const unsigned char *sha1); @@ -811,6 +813,7 @@ const char *show_date_relative(unsigned long time, int tz, char *timebuf, size_t timebuf_size); int parse_date(const char *date, char *buf, int bufsize); +int parse_date_basic(const char *date, unsigned long *timestamp, int *offset); void datestamp(char *buf, int bufsize); #define approxidate(s) approxidate_careful((s), NULL) unsigned long approxidate_careful(const char *, int *); @@ -1054,6 +1057,7 @@ extern void trace_argv_printf(const char **argv, const char *format, ...); extern int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe); extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); +extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); /* add */ /* @@ -1096,6 +1100,14 @@ void overlay_tree_on_cache(const char *tree_name, const char *prefix); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); +/* Takes a negative value returned by split_cmdline */ +const char *split_cmdline_strerror(int cmdline_errno); + +/* git.c */ +struct startup_info { + int have_repository; +}; +extern struct startup_info *startup_info; /* builtin/merge.c */ int checkout_fast_forward(const unsigned char *from, const unsigned char *to); @@ -315,6 +315,25 @@ int parse_commit(struct commit *item) return ret; } +int find_commit_subject(const char *commit_buffer, const char **subject) +{ + const char *eol; + const char *p = commit_buffer; + + while (*p && (*p != '\n' || p[1] != '\n')) + p++; + if (*p) { + p += 2; + for (eol = p; *eol && *eol != '\n'; eol++) + ; /* do nothing */ + } else + eol = p; + + *subject = p; + + return eol - p; +} + struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p) { struct commit_list *new_list = xmalloc(sizeof(struct commit_list)); @@ -41,6 +41,9 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size); int parse_commit(struct commit *item); +/* Find beginning and length of commit subject. */ +int find_commit_subject(const char *commit_buffer, const char **subject); + struct commit_list * commit_list_insert(struct commit *item, struct commit_list **list_p); unsigned commit_list_count(const struct commit_list *l); struct commit_list * insert_by_date(struct commit *item, struct commit_list **list); diff --git a/compat/regex/regcomp.c b/compat/regex/regcomp.c new file mode 100644 index 0000000000..8c96ed942c --- /dev/null +++ b/compat/regex/regcomp.c @@ -0,0 +1,3884 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, + size_t length, reg_syntax_t syntax); +static void re_compile_fastmap_iter (regex_t *bufp, + const re_dfastate_t *init_state, + char *fastmap); +static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); +#ifdef RE_ENABLE_I18N +static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ +static void free_workarea_compile (regex_t *preg); +static reg_errcode_t create_initial_state (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N +static void optimize_utf8 (re_dfa_t *dfa); +#endif +static reg_errcode_t analyze (regex_t *preg); +static reg_errcode_t preorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t postorder (bin_tree_t *root, + reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra); +static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); +static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); +static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, + bin_tree_t *node); +static reg_errcode_t calc_first (void *extra, bin_tree_t *node); +static reg_errcode_t calc_next (void *extra, bin_tree_t *node); +static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); +static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); +static int search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint); +static reg_errcode_t calc_eclosure (re_dfa_t *dfa); +static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, + int node, int root); +static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); +static int fetch_number (re_string_t *input, re_token_t *token, + reg_syntax_t syntax); +static int peek_token (re_token_t *token, re_string_t *input, + reg_syntax_t syntax) internal_function; +static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, + re_token_t *token, reg_syntax_t syntax, + int nest, reg_errcode_t *err); +static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, + re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err); +static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, + reg_errcode_t *err); +static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token, int token_len, + re_dfa_t *dfa, + reg_syntax_t syntax, + int accept_hyphen); +static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, + re_string_t *regexp, + re_token_t *token); +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (bitset_t sbcset, + re_charset_t *mbcset, + int *equiv_class_alloc, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + re_charset_t *mbcset, + int *char_class_alloc, + const char *class_name, + reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (bitset_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, + const char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, + RE_TRANSLATE_TYPE trans, + const char *class_name, + const char *extra, + int non_match, reg_errcode_t *err); +static bin_tree_t *create_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + re_token_type_t type); +static bin_tree_t *create_token_tree (re_dfa_t *dfa, + bin_tree_t *left, bin_tree_t *right, + const re_token_t *token); +static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); +static void free_token (re_token_t *node); +static reg_errcode_t free_tree (void *extra, bin_tree_t *node); +static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +const char __re_error_msgid[] attribute_hidden = + { +#define REG_NOERROR_IDX 0 + gettext_noop ("Success") /* REG_NOERROR */ + "\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") + gettext_noop ("No match") /* REG_NOMATCH */ + "\0" +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") + gettext_noop ("Invalid regular expression") /* REG_BADPAT */ + "\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") + gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ + "\0" +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") + gettext_noop ("Invalid character class name") /* REG_ECTYPE */ + "\0" +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") + gettext_noop ("Trailing backslash") /* REG_EESCAPE */ + "\0" +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") + gettext_noop ("Invalid back reference") /* REG_ESUBREG */ + "\0" +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") + gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + "\0" +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") + gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ + "\0" +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") + gettext_noop ("Unmatched \\{") /* REG_EBRACE */ + "\0" +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") + gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ + "\0" +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") + gettext_noop ("Invalid range end") /* REG_ERANGE */ + "\0" +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") + gettext_noop ("Memory exhausted") /* REG_ESPACE */ + "\0" +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") + gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ + "\0" +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") + gettext_noop ("Premature end of regular expression") /* REG_EEND */ + "\0" +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") + gettext_noop ("Regular expression too big") /* REG_ESIZE */ + "\0" +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ + }; + +const size_t __re_error_msgid_idx[] attribute_hidden = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX + }; + +/* Entry points for GNU code. */ + + +#ifdef ZOS_USS + +/* For ZOS USS we must define btowc */ + +wchar_t +btowc (int c) +{ + wchar_t wtmp[2]; + char tmp[2]; + + tmp[0] = c; + tmp[1] = 0; + + mbtowc (wtmp, tmp, 1); + return wtmp[0]; +} +#endif + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length LENGTH) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. */ + +const char * +re_compile_pattern (const char *pattern, + size_t length, + struct re_pattern_buffer *bufp) +{ + reg_errcode_t ret; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub, unless RE_NO_SUB is set. */ + bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = re_compile_internal (bufp, pattern, length, re_syntax_options); + + if (!ret) + return NULL; + return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (reg_syntax_t syntax) +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif + +int +re_compile_fastmap (struct re_pattern_buffer *bufp) +{ + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + char *fastmap = bufp->fastmap; + + memset (fastmap, '\0', sizeof (char) * SBC_MAX); + re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); + if (dfa->init_state != dfa->init_state_word) + re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); + if (dfa->init_state != dfa->init_state_nl) + re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); + if (dfa->init_state != dfa->init_state_begbuf) + re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); + bufp->fastmap_accurate = 1; + return 0; +} +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif + +static inline void +__attribute ((always_inline)) +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + +/* Helper function for re_compile_fastmap. + Compile fastmap for the initial_state INIT_STATE. */ + +static void +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) +{ + volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + int node_cnt; + int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); + for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) + { + int node = init_state->nodes.elems[node_cnt]; + re_token_type_t type = dfa->nodes[node].type; + + if (type == CHARACTER) + { + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); +#ifdef RE_ENABLE_I18N + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p; + wchar_t wc; + mbstate_t state; + + p = buf; + *p++ = dfa->nodes[node].opr.c; + while (++node < dfa->nodes_len + && dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].mb_partial) + *p++ = dfa->nodes[node].opr.c; + memset (&state, '\0', sizeof (state)); + if (__mbrtowc (&wc, (const char *) buf, p - buf, + &state) == p - buf + && (__wcrtomb ((char *) buf, towlower (wc), &state) + != (size_t) -1)) + re_set_fastmap (fastmap, 0, buf[0]); + re_free (buf); + } +#endif + } + else if (type == SIMPLE_BRACKET) + { + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } + } +#ifdef RE_ENABLE_I18N + else if (type == COMPLEX_BRACKET) + { + re_charset_t *cset = dfa->nodes[node].opr.mbcset; + int i; + +# ifdef _LIBC + /* See if we have to try all bytes which start multiple collation + elements. + e.g. In da_DK, we want to catch 'a' since "aa" is a valid + collation element, and don't catch 'b' since 'b' is + the only collation element which starts from 'b' (and + it is caught by SIMPLE_BRACKET). */ + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 + && (cset->ncoll_syms || cset->nranges)) + { + const int32_t *table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); + } +# endif /* _LIBC */ + + /* See if we have to start the match at all multibyte characters, + i.e. where we would not find an invalid sequence. This only + applies to multibyte character sets; for single byte character + sets, the SIMPLE_BRACKET again suffices. */ + if (dfa->mb_cur_max > 1 + && (cset->nchar_classes || cset->non_match || cset->nranges +# ifdef _LIBC + || cset->nequiv_classes +# endif /* _LIBC */ + )) + { + unsigned char c = 0; + do + { + mbstate_t mbs; + memset (&mbs, 0, sizeof (mbs)); + if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) + re_set_fastmap (fastmap, false, (int) c); + } + while (++c != 0); + } + + else + { + /* ... Else catch all bytes which can start the mbchars. */ + for (i = 0; i < cset->nmbchars; ++i) + { + char buf[256]; + mbstate_t state; + memset (&state, '\0', sizeof (state)); + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) + { + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) + != (size_t) -1) + re_set_fastmap (fastmap, false, *(unsigned char *) buf); + } + } + } + } +#endif /* RE_ENABLE_I18N */ + else if (type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == OP_UTF8_PERIOD +#endif /* RE_ENABLE_I18N */ + || type == END_OF_RE) + { + memset (fastmap, '\1', sizeof (char) * SBC_MAX); + if (type == END_OF_RE) + bufp->can_be_null = 1; + return; + } + } +} + +/* Entry point for POSIX code. */ +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (regex_t *__restrict preg, + const char *__restrict pattern, + int cflags) +{ + reg_errcode_t ret; + reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + preg->buffer = NULL; + preg->allocated = 0; + preg->used = 0; + + /* Try to allocate space for the fastmap. */ + preg->fastmap = re_malloc (char, SBC_MAX); + if (BE (preg->fastmap == NULL, 0)) + return REG_ESPACE; + + syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + preg->no_sub = !!(cflags & REG_NOSUB); + preg->translate = NULL; + + ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) + ret = REG_EPAREN; + + /* We have already checked preg->fastmap != NULL. */ + if (BE (ret == REG_NOERROR, 1)) + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. This function never fails in this implementation. */ + (void) re_compile_fastmap (preg); + else + { + /* Some error occurred while compiling the expression. */ + re_free (preg->fastmap); + preg->fastmap = NULL; + } + + return (int) ret; +} +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror(int errcode, const regex_t *__restrict preg, + char *__restrict errbuf, size_t errbuf_size) +{ + const char *msg; + size_t msg_size; + + if (BE (errcode < 0 + || errcode >= (int) (sizeof (__re_error_msgid_idx) + / sizeof (__re_error_msgid_idx[0])), 0)) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (BE (errbuf_size != 0, 1)) + { + if (BE (msg_size > errbuf_size, 0)) + { + memcpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + memcpy (errbuf, msg, msg_size); + } + + return msg_size; +} +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif + + +#ifdef RE_ENABLE_I18N +/* This static array is used for the map to single-byte characters when + UTF-8 is used. Otherwise we would allocate memory just to initialize + it the same all the time. UTF-8 is the preferred encoding so this is + a worthwhile optimization. */ +#if __GNUC__ >= 3 +static const bitset_t utf8_sb_map = { + /* Set the first 128 bits. */ + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX +}; +#else /* ! (__GNUC__ >= 3) */ +static bitset_t utf8_sb_map; +#endif /* __GNUC__ >= 3 */ +#endif /* RE_ENABLE_I18N */ + + +static void +free_dfa_content (re_dfa_t *dfa) +{ + int i, j; + + if (dfa->nodes) + for (i = 0; i < dfa->nodes_len; ++i) + free_token (dfa->nodes + i); + re_free (dfa->nexts); + for (i = 0; i < dfa->nodes_len; ++i) + { + if (dfa->eclosures != NULL) + re_node_set_free (dfa->eclosures + i); + if (dfa->inveclosures != NULL) + re_node_set_free (dfa->inveclosures + i); + if (dfa->edests != NULL) + re_node_set_free (dfa->edests + i); + } + re_free (dfa->edests); + re_free (dfa->eclosures); + re_free (dfa->inveclosures); + re_free (dfa->nodes); + + if (dfa->state_table) + for (i = 0; i <= dfa->state_hash_mask; ++i) + { + struct re_state_table_entry *entry = dfa->state_table + i; + for (j = 0; j < entry->num; ++j) + { + re_dfastate_t *state = entry->array[j]; + free_state (state); + } + re_free (entry->array); + } + re_free (dfa->state_table); +#ifdef RE_ENABLE_I18N + if (dfa->sb_char != utf8_sb_map) + re_free (dfa->sb_char); +#endif + re_free (dfa->subexp_map); +#ifdef DEBUG + re_free (dfa->re_str); +#endif + + re_free (dfa); +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + if (BE (dfa != NULL, 1)) + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + + re_free (preg->fastmap); + preg->fastmap = NULL; + + re_free (preg->translate); + preg->translate = NULL; +} +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +# ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec above without link errors. */ +weak_function +# endif +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + char *fastmap; + + if (!s) + { + if (!re_comp_buf.buffer) + return gettext ("No previous regular expression"); + return 0; + } + + if (re_comp_buf.buffer) + { + fastmap = re_comp_buf.fastmap; + re_comp_buf.fastmap = NULL; + __regfree (&re_comp_buf); + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); + re_comp_buf.fastmap = fastmap; + } + + if (re_comp_buf.fastmap == NULL) + { + re_comp_buf.fastmap = (char *) malloc (SBC_MAX); + if (re_comp_buf.fastmap == NULL) + return (char *) gettext (__re_error_msgid + + __re_error_msgid_idx[(int) REG_ESPACE]); + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +libc_freeres_fn (free_mem) +{ + __regfree (&re_comp_buf); +} +#endif + +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. + Compile the regular expression PATTERN, whose length is LENGTH. + SYNTAX indicate regular expression's syntax. */ + +static reg_errcode_t +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) +{ + reg_errcode_t err = REG_NOERROR; + re_dfa_t *dfa; + re_string_t regexp; + + /* Initialize the pattern buffer. */ + preg->fastmap_accurate = 0; + preg->syntax = syntax; + preg->not_bol = preg->not_eol = 0; + preg->used = 0; + preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; + + /* Initialize the dfa. */ + dfa = (re_dfa_t *) preg->buffer; + if (BE (preg->allocated < sizeof (re_dfa_t), 0)) + { + /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. If ->buffer is NULL this + is a simple allocation. */ + dfa = re_realloc (preg->buffer, re_dfa_t, 1); + if (dfa == NULL) + return REG_ESPACE; + preg->allocated = sizeof (re_dfa_t); + preg->buffer = (unsigned char *) dfa; + } + preg->used = sizeof (re_dfa_t); + + err = init_dfa (dfa, length); + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } +#ifdef DEBUG + /* Note: length+1 will not overflow since it is checked in init_dfa. */ + dfa->re_str = re_malloc (char, length + 1); + strncpy (dfa->re_str, pattern, length + 1); +#endif + + __libc_lock_init (dfa->lock); + + err = re_string_construct (®exp, pattern, length, preg->translate, + syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + { + re_compile_internal_free_return: + free_workarea_compile (preg); + re_string_destruct (®exp); + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + return err; + } + + /* Parse the regular expression, and build a structure tree. */ + preg->re_nsub = 0; + dfa->str_tree = parse (®exp, preg, syntax, &err); + if (BE (dfa->str_tree == NULL, 0)) + goto re_compile_internal_free_return; + + /* Analyze the tree and create the nfa. */ + err = analyze (preg); + if (BE (err != REG_NOERROR, 0)) + goto re_compile_internal_free_return; + +#ifdef RE_ENABLE_I18N + /* If possible, do searching in single byte encoding to speed things up. */ + if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) + optimize_utf8 (dfa); +#endif + + /* Then create the initial state of the dfa. */ + err = create_initial_state (dfa); + + /* Release work areas. */ + free_workarea_compile (preg); + re_string_destruct (®exp); + + if (BE (err != REG_NOERROR, 0)) + { + free_dfa_content (dfa); + preg->buffer = NULL; + preg->allocated = 0; + } + + return err; +} + +/* Initialize DFA. We use the length of the regular expression PAT_LEN + as the initial length of some arrays. */ + +static reg_errcode_t +init_dfa (re_dfa_t *dfa, size_t pat_len) +{ + unsigned int table_size; +#ifndef _LIBC + char *codeset_name; +#endif + + memset (dfa, '\0', sizeof (re_dfa_t)); + + /* Force allocation of str_tree_storage the first time. */ + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + + /* Avoid overflows. */ + if (pat_len == SIZE_MAX) + return REG_ESPACE; + + dfa->nodes_alloc = pat_len + 1; + dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + + /* table_size = 2 ^ ceil(log pat_len) */ + for (table_size = 1; ; table_size <<= 1) + if (table_size > pat_len) + break; + + dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); + dfa->state_hash_mask = table_size - 1; + + dfa->mb_cur_max = MB_CUR_MAX; +#ifdef _LIBC + if (dfa->mb_cur_max == 6 + && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) + dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); +#else +# ifdef HAVE_LANGINFO_CODESET + codeset_name = nl_langinfo (CODESET); +# else + codeset_name = getenv ("LC_ALL"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LC_CTYPE"); + if (codeset_name == NULL || codeset_name[0] == '\0') + codeset_name = getenv ("LANG"); + if (codeset_name == NULL) + codeset_name = ""; + else if (strchr (codeset_name, '.') != NULL) + codeset_name = strchr (codeset_name, '.') + 1; +# endif + + /* strcasecmp isn't a standard interface. brute force check */ +#if 0 + if (strcasecmp (codeset_name, "UTF-8") == 0 + || strcasecmp (codeset_name, "UTF8") == 0) + dfa->is_utf8 = 1; +#else + if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u') + && (codeset_name[1] == 'T' || codeset_name[1] == 't') + && (codeset_name[2] == 'F' || codeset_name[2] == 'f') + && (codeset_name[3] == '-' + ? codeset_name[4] == '8' && codeset_name[5] == '\0' + : codeset_name[3] == '8' && codeset_name[4] == '\0')) + dfa->is_utf8 = 1; +#endif + + /* We check exhaustively in the loop below if this charset is a + superset of ASCII. */ + dfa->map_notascii = 0; +#endif + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + if (dfa->is_utf8) + { +#if !defined(__GNUC__) || __GNUC__ < 3 + static short utf8_sb_map_inited = 0; + + if (! utf8_sb_map_inited) + { + int i; + + utf8_sb_map_inited = 0; + for (i = 0; i <= 0x80 / BITSET_WORD_BITS - 1; i++) + utf8_sb_map[i] = BITSET_WORD_MAX; + } +#endif + dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; + } + else + { + int i, j, ch; + + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); + if (BE (dfa->sb_char == NULL, 0)) + return REG_ESPACE; + + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + { + wint_t wch = __btowc (ch); + if (wch != WEOF) + dfa->sb_char[i] |= (bitset_word_t) 1 << j; +# ifndef _LIBC + if (isascii (ch) && wch != ch) + dfa->map_notascii = 1; +# endif + } + } + } +#endif + + if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +/* Initialize WORD_CHAR table, which indicate which character is + "word". In this case "word" means that it is the word construction + character used by some operators like "\<", "\>", etc. */ + +static void +internal_function +init_word_char (re_dfa_t *dfa) +{ + int i, j, ch; + dfa->word_ops_used = 1; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (isalnum (ch) || ch == '_') + dfa->word_char[i] |= (bitset_word_t) 1 << j; +} + +/* Free the work area which are only used while compiling. */ + +static void +free_workarea_compile (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_storage_t *storage, *next; + for (storage = dfa->str_tree_storage; storage; storage = next) + { + next = storage->next; + re_free (storage); + } + dfa->str_tree_storage = NULL; + dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; + dfa->str_tree = NULL; + re_free (dfa->org_indices); + dfa->org_indices = NULL; +} + +/* Create initial states for all contexts. */ + +static reg_errcode_t +create_initial_state (re_dfa_t *dfa) +{ + int first, i; + reg_errcode_t err; + re_node_set init_nodes; + + /* Initial states have the epsilon closure of the node which is + the first node of the regular expression. */ + first = dfa->str_tree->first->node_idx; + dfa->init_node = first; + err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* The back-references which are in initial states can epsilon transit, + since in this case all of the subexpressions can be null. + Then we add epsilon closures of the nodes which are the next nodes of + the back-references. */ + if (dfa->nbackref > 0) + for (i = 0; i < init_nodes.nelem; ++i) + { + int node_idx = init_nodes.elems[i]; + re_token_type_t type = dfa->nodes[node_idx].type; + + int clexp_idx; + if (type != OP_BACK_REF) + continue; + for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) + { + re_token_t *clexp_node; + clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; + if (clexp_node->type == OP_CLOSE_SUBEXP + && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) + break; + } + if (clexp_idx == init_nodes.nelem) + continue; + + if (type == OP_BACK_REF) + { + int dest_idx = dfa->edests[node_idx].elems[0]; + if (!re_node_set_contains (&init_nodes, dest_idx)) + { + reg_errcode_t err = re_node_set_merge (&init_nodes, + dfa->eclosures + + dest_idx); + if (err != REG_NOERROR) + return err; + i = 0; + } + } + } + + /* It must be the first time to invoke acquire_state. */ + dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); + /* We don't check ERR here, since the initial state must not be NULL. */ + if (BE (dfa->init_state == NULL, 0)) + return err; + if (dfa->init_state->has_constraint) + { + dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_WORD); + dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, + CONTEXT_NEWLINE); + dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, + &init_nodes, + CONTEXT_NEWLINE + | CONTEXT_BEGBUF); + if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return err; + } + else + dfa->init_state_word = dfa->init_state_nl + = dfa->init_state_begbuf = dfa->init_state; + + re_node_set_free (&init_nodes); + return REG_NOERROR; +} + +#ifdef RE_ENABLE_I18N +/* If it is possible to do searching in single byte encoding instead of UTF-8 + to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change + DFA nodes where needed. */ + +static void +optimize_utf8 (re_dfa_t *dfa) +{ + int node, i, mb_chars = 0, has_period = 0; + + for (node = 0; node < dfa->nodes_len; ++node) + switch (dfa->nodes[node].type) + { + case CHARACTER: + if (dfa->nodes[node].opr.c >= 0x80) + mb_chars = 1; + break; + case ANCHOR: + switch (dfa->nodes[node].opr.ctx_type) + { + case LINE_FIRST: + case LINE_LAST: + case BUF_FIRST: + case BUF_LAST: + break; + default: + /* Word anchors etc. cannot be handled. It's okay to test + opr.ctx_type since constraints (for all DFA nodes) are + created by ORing one or more opr.ctx_type values. */ + return; + } + break; + case OP_PERIOD: + has_period = 1; + break; + case OP_BACK_REF: + case OP_ALT: + case END_OF_RE: + case OP_DUP_ASTERISK: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + break; + case COMPLEX_BRACKET: + return; + case SIMPLE_BRACKET: + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + if (dfa->nodes[node].opr.sbcset[i]) + return; + break; + default: + abort (); + } + + if (mb_chars || has_period) + for (node = 0; node < dfa->nodes_len; ++node) + { + if (dfa->nodes[node].type == CHARACTER + && dfa->nodes[node].opr.c >= 0x80) + dfa->nodes[node].mb_partial = 0; + else if (dfa->nodes[node].type == OP_PERIOD) + dfa->nodes[node].type = OP_UTF8_PERIOD; + } + + /* The search can be in single byte locale. */ + dfa->mb_cur_max = 1; + dfa->is_utf8 = 0; + dfa->has_mb_node = dfa->nbackref > 0 || has_period; +} +#endif + +/* Analyze the structure tree, and calculate "first", "next", "edest", + "eclosure", and "inveclosure". */ + +static reg_errcode_t +analyze (regex_t *preg) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + reg_errcode_t ret; + + /* Allocate arrays. */ + dfa->nexts = re_malloc (int, dfa->nodes_alloc); + dfa->org_indices = re_malloc (int, dfa->nodes_alloc); + dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); + if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL + || dfa->eclosures == NULL, 0)) + return REG_ESPACE; + + dfa->subexp_map = re_malloc (int, preg->re_nsub); + if (dfa->subexp_map != NULL) + { + int i; + for (i = 0; i < preg->re_nsub; i++) + dfa->subexp_map[i] = i; + preorder (dfa->str_tree, optimize_subexps, dfa); + for (i = 0; i < preg->re_nsub; i++) + if (dfa->subexp_map[i] != i) + break; + if (i == preg->re_nsub) + { + free (dfa->subexp_map); + dfa->subexp_map = NULL; + } + } + + ret = postorder (dfa->str_tree, lower_subexps, preg); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = postorder (dfa->str_tree, calc_first, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + preorder (dfa->str_tree, calc_next, dfa); + ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + ret = calc_eclosure (dfa); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + /* We only need this during the prune_impossible_nodes pass in regexec.c; + skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ + if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) + || dfa->nbackref) + { + dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + if (BE (dfa->inveclosures == NULL, 0)) + return REG_ESPACE; + ret = calc_inveclosure (dfa); + } + + return ret; +} + +/* Our parse trees are very unbalanced, so we cannot use a stack to + implement parse tree visits. Instead, we use parent pointers and + some hairy code in these two functions. */ +static reg_errcode_t +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node, *prev; + + for (node = root; ; ) + { + /* Descend down the tree, preferably to the left (or to the right + if that's the only child). */ + while (node->left || node->right) + if (node->left) + node = node->left; + else + node = node->right; + + do + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + if (node->parent == NULL) + return REG_NOERROR; + prev = node; + node = node->parent; + } + /* Go up while we have a node that is reached from the right. */ + while (node->right == prev || node->right == NULL); + node = node->right; + } +} + +static reg_errcode_t +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) +{ + bin_tree_t *node; + + for (node = root; ; ) + { + reg_errcode_t err = fn (extra, node); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Go to the left node, or up and to the right. */ + if (node->left) + node = node->left; + else + { + bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + if (!node) + return REG_NOERROR; + } + node = node->right; + } + } +} + +/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell + re_search_internal to map the inner one's opr.idx to this one's. Adjust + backreferences as well. Requires a preorder visit. */ +static reg_errcode_t +optimize_subexps (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + + if (node->token.type == OP_BACK_REF && dfa->subexp_map) + { + int idx = node->token.opr.idx; + node->token.opr.idx = dfa->subexp_map[idx]; + dfa->used_bkref_map |= 1 << node->token.opr.idx; + } + + else if (node->token.type == SUBEXP + && node->left && node->left->token.type == SUBEXP) + { + int other_idx = node->left->token.opr.idx; + + node->left = node->left->left; + if (node->left) + node->left->parent = node; + + dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); + } + + return REG_NOERROR; +} + +/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation + of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ +static reg_errcode_t +lower_subexps (void *extra, bin_tree_t *node) +{ + regex_t *preg = (regex_t *) extra; + reg_errcode_t err = REG_NOERROR; + + if (node->left && node->left->token.type == SUBEXP) + { + node->left = lower_subexp (&err, preg, node->left); + if (node->left) + node->left->parent = node; + } + if (node->right && node->right->token.type == SUBEXP) + { + node->right = lower_subexp (&err, preg, node->right); + if (node->right) + node->right->parent = node; + } + + return err; +} + +static bin_tree_t * +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *body = node->left; + bin_tree_t *op, *cls, *tree1, *tree; + + if (preg->no_sub + /* We do not optimize empty subexpressions, because otherwise we may + have bad CONCAT nodes with NULL children. This is obviously not + very common, so we do not lose much. An example that triggers + this case is the sed "script" /\(\)/x. */ + && node->left != NULL + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) + return node->left; + + /* Convert the SUBEXP node to the concatenation of an + OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ + op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); + cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); + tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; + tree = create_tree (dfa, op, tree1, CONCAT); + if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + + op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; + op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; + return tree; +} + +/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton + nodes. Requires a postorder visit. */ +static reg_errcode_t +calc_first (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + if (node->token.type == CONCAT) + { + node->first = node->left->first; + node->node_idx = node->left->node_idx; + } + else + { + node->first = node; + node->node_idx = re_dfa_add_node (dfa, node->token); + if (BE (node->node_idx == -1, 0)) + return REG_ESPACE; + if (node->token.type == ANCHOR) + dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; + } + return REG_NOERROR; +} + +/* Pass 2: compute NEXT on the tree. Preorder visit. */ +static reg_errcode_t +calc_next (void *extra, bin_tree_t *node) +{ + switch (node->token.type) + { + case OP_DUP_ASTERISK: + node->left->next = node; + break; + case CONCAT: + node->left->next = node->right->first; + node->right->next = node->next; + break; + default: + if (node->left) + node->left->next = node->next; + if (node->right) + node->right->next = node->next; + break; + } + return REG_NOERROR; +} + +/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ +static reg_errcode_t +link_nfa_nodes (void *extra, bin_tree_t *node) +{ + re_dfa_t *dfa = (re_dfa_t *) extra; + int idx = node->node_idx; + reg_errcode_t err = REG_NOERROR; + + switch (node->token.type) + { + case CONCAT: + break; + + case END_OF_RE: + assert (node->next == NULL); + break; + + case OP_DUP_ASTERISK: + case OP_ALT: + { + int left, right; + dfa->has_plural_match = 1; + if (node->left != NULL) + left = node->left->first->node_idx; + else + left = node->next->node_idx; + if (node->right != NULL) + right = node->right->first->node_idx; + else + right = node->next->node_idx; + assert (left > -1); + assert (right > -1); + err = re_node_set_init_2 (dfa->edests + idx, left, right); + } + break; + + case ANCHOR: + case OP_OPEN_SUBEXP: + case OP_CLOSE_SUBEXP: + err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); + break; + + case OP_BACK_REF: + dfa->nexts[idx] = node->next->node_idx; + if (node->token.type == OP_BACK_REF) + err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); + break; + + default: + assert (!IS_EPSILON_NODE (node->token.type)); + dfa->nexts[idx] = node->next->node_idx; + break; + } + + return err; +} + +/* Duplicate the epsilon closure of the node ROOT_NODE. + Note that duplicated nodes have constraint INIT_CONSTRAINT in addition + to their own constraint. */ + +static reg_errcode_t +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) +{ + int org_node, clone_node, ret; + unsigned int constraint = init_constraint; + for (org_node = top_org_node, clone_node = top_clone_node;;) + { + int org_dest, clone_dest; + if (dfa->nodes[org_node].type == OP_BACK_REF) + { + /* If the back reference epsilon-transit, its destination must + also have the constraint. Then duplicate the epsilon closure + of the destination of the back reference, and store it in + edests of the back reference. */ + org_dest = dfa->nexts[org_node]; + re_node_set_empty (dfa->edests + clone_node); + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + dfa->nexts[clone_node] = dfa->nexts[org_node]; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else if (dfa->edests[org_node].nelem == 0) + { + /* In case of the node can't epsilon-transit, don't duplicate the + destination and store the original destination as the + destination of the node. */ + dfa->nexts[clone_node] = dfa->nexts[org_node]; + break; + } + else if (dfa->edests[org_node].nelem == 1) + { + /* In case of the node can epsilon-transit, and it has only one + destination. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* If the node is root_node itself, it means the epsilon clsoure + has a loop. Then tie it to the destination of the root_node. */ + if (org_node == root_node && clone_node != org_node) + { + ret = re_node_set_insert (dfa->edests + clone_node, org_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + break; + } + /* In case of the node has another constraint, add it. */ + constraint |= dfa->nodes[org_node].constraint; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + else /* dfa->edests[org_node].nelem == 2 */ + { + /* In case of the node can epsilon-transit, and it has two + destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ + org_dest = dfa->edests[org_node].elems[0]; + re_node_set_empty (dfa->edests + clone_node); + /* Search for a duplicated node which satisfies the constraint. */ + clone_dest = search_duplicated_node (dfa, org_dest, constraint); + if (clone_dest == -1) + { + /* There is no such duplicated node, create a new one. */ + reg_errcode_t err; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + err = duplicate_node_closure (dfa, org_dest, clone_dest, + root_node, constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + { + /* There is a duplicated node which satisfies the constraint, + use it to avoid infinite loop. */ + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + + org_dest = dfa->edests[org_node].elems[1]; + clone_dest = duplicate_node (dfa, org_dest, constraint); + if (BE (clone_dest == -1, 0)) + return REG_ESPACE; + ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); + if (BE (ret < 0, 0)) + return REG_ESPACE; + } + org_node = org_dest; + clone_node = clone_dest; + } + return REG_NOERROR; +} + +/* Search for a node which is duplicated from the node ORG_NODE, and + satisfies the constraint CONSTRAINT. */ + +static int +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) +{ + int idx; + for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) + { + if (org_node == dfa->org_indices[idx] + && constraint == dfa->nodes[idx].constraint) + return idx; /* Found. */ + } + return -1; /* Not found. */ +} + +/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. + Return the index of the new node, or -1 if insufficient storage is + available. */ + +static int +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) +{ + int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); + if (BE (dup_idx != -1, 1)) + { + dfa->nodes[dup_idx].constraint = constraint; + dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint; + dfa->nodes[dup_idx].duplicated = 1; + + /* Store the index of the original node. */ + dfa->org_indices[dup_idx] = org_idx; + } + return dup_idx; +} + +static reg_errcode_t +calc_inveclosure (re_dfa_t *dfa) +{ + int src, idx, ret; + for (idx = 0; idx < dfa->nodes_len; ++idx) + re_node_set_init_empty (dfa->inveclosures + idx); + + for (src = 0; src < dfa->nodes_len; ++src) + { + int *elems = dfa->eclosures[src].elems; + for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) + { + ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + } + + return REG_NOERROR; +} + +/* Calculate "eclosure" for all the node in DFA. */ + +static reg_errcode_t +calc_eclosure (re_dfa_t *dfa) +{ + int node_idx, incomplete; +#ifdef DEBUG + assert (dfa->nodes_len > 0); +#endif + incomplete = 0; + /* For each nodes, calculate epsilon closure. */ + for (node_idx = 0; ; ++node_idx) + { + reg_errcode_t err; + re_node_set eclosure_elem; + if (node_idx == dfa->nodes_len) + { + if (!incomplete) + break; + incomplete = 0; + node_idx = 0; + } + +#ifdef DEBUG + assert (dfa->eclosures[node_idx].nelem != -1); +#endif + + /* If we have already calculated, skip it. */ + if (dfa->eclosures[node_idx].nelem != 0) + continue; + /* Calculate epsilon closure of `node_idx'. */ + err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (dfa->eclosures[node_idx].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + return REG_NOERROR; +} + +/* Calculate epsilon closure of NODE. */ + +static reg_errcode_t +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) +{ + reg_errcode_t err; + int i; + re_node_set eclosure; + int ret; + int incomplete = 0; + err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* This indicates that we are calculating this node now. + We reference this value to avoid infinite loop. */ + dfa->eclosures[node].nelem = -1; + + /* If the current node has constraints, duplicate all nodes + since they must inherit the constraints. */ + if (dfa->nodes[node].constraint + && dfa->edests[node].nelem + && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) + { + err = duplicate_node_closure (dfa, node, node, node, + dfa->nodes[node].constraint); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Expand each epsilon destination nodes. */ + if (IS_EPSILON_NODE(dfa->nodes[node].type)) + for (i = 0; i < dfa->edests[node].nelem; ++i) + { + re_node_set eclosure_elem; + int edest = dfa->edests[node].elems[i]; + /* If calculating the epsilon closure of `edest' is in progress, + return intermediate result. */ + if (dfa->eclosures[edest].nelem == -1) + { + incomplete = 1; + continue; + } + /* If we haven't calculated the epsilon closure of `edest' yet, + calculate now. Otherwise use calculated epsilon closure. */ + if (dfa->eclosures[edest].nelem == 0) + { + err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + eclosure_elem = dfa->eclosures[edest]; + /* Merge the epsilon closure of `edest'. */ + err = re_node_set_merge (&eclosure, &eclosure_elem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* If the epsilon closure of `edest' is incomplete, + the epsilon closure of this node is also incomplete. */ + if (dfa->eclosures[edest].nelem == 0) + { + incomplete = 1; + re_node_set_free (&eclosure_elem); + } + } + + /* An epsilon closure includes itself. */ + ret = re_node_set_insert (&eclosure, node); + if (BE (ret < 0, 0)) + return REG_ESPACE; + if (incomplete && !root) + dfa->eclosures[node].nelem = 0; + else + dfa->eclosures[node] = eclosure; + *new_set = eclosure; + return REG_NOERROR; +} + +/* Functions for token which are used in the parser. */ + +/* Fetch a token from INPUT. + We must not use this function inside bracket expressions. */ + +static void +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) +{ + re_string_skip_bytes (input, peek_token (result, input, syntax)); +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function inside bracket expressions. */ + +static int +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + + c = re_string_peek_byte (input, 0); + token->opr.c = c; + + token->word_char = 0; +#ifdef RE_ENABLE_I18N + token->mb_partial = 0; + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + token->mb_partial = 1; + return 1; + } +#endif + if (c == '\\') + { + unsigned char c2; + if (re_string_cur_idx (input) + 1 >= re_string_length (input)) + { + token->type = BACK_SLASH; + return 1; + } + + c2 = re_string_peek_byte_case (input, 1); + token->opr.c = c2; + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, + re_string_cur_idx (input) + 1); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (c2) != 0; + + switch (c2) + { + case '|': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (!(syntax & RE_NO_BK_REFS)) + { + token->type = OP_BACK_REF; + token->opr.idx = c2 - '1'; + } + break; + case '<': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_FIRST; + } + break; + case '>': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_LAST; + } + break; + case 'b': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = WORD_DELIM; + } + break; + case 'B': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = NOT_WORD_DELIM; + } + break; + case 'w': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_WORD; + break; + case 'W': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTWORD; + break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; + case '`': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_FIRST; + } + break; + case '\'': + if (!(syntax & RE_NO_GNU_OPS)) + { + token->type = ANCHOR; + token->opr.ctx_type = BUF_LAST; + } + break; + case '(': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (!(syntax & RE_NO_BK_PARENS)) + token->type = OP_CLOSE_SUBEXP; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) + token->type = OP_CLOSE_DUP_NUM; + break; + default: + break; + } + return 2; + } + + token->type = CHARACTER; +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); + token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; + } + else +#endif + token->word_char = IS_WORD_CHAR (token->opr.c); + + switch (c) + { + case '\n': + if (syntax & RE_NEWLINE_ALT) + token->type = OP_ALT; + break; + case '|': + if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) + token->type = OP_ALT; + break; + case '*': + token->type = OP_DUP_ASTERISK; + break; + case '+': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_PLUS; + break; + case '?': + if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) + token->type = OP_DUP_QUESTION; + break; + case '{': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_OPEN_DUP_NUM; + break; + case '}': + if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + token->type = OP_CLOSE_DUP_NUM; + break; + case '(': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_OPEN_SUBEXP; + break; + case ')': + if (syntax & RE_NO_BK_PARENS) + token->type = OP_CLOSE_SUBEXP; + break; + case '[': + token->type = OP_OPEN_BRACKET; + break; + case '.': + token->type = OP_PERIOD; + break; + case '^': + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && + re_string_cur_idx (input) != 0) + { + char prev = re_string_peek_byte (input, -1); + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_FIRST; + break; + case '$': + if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + re_string_cur_idx (input) + 1 != re_string_length (input)) + { + re_token_t next; + re_string_skip_bytes (input, 1); + peek_token (&next, input, syntax); + re_string_skip_bytes (input, -1); + if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) + break; + } + token->type = ANCHOR; + token->opr.ctx_type = LINE_LAST; + break; + default: + break; + } + return 1; +} + +/* Peek a token from INPUT, and return the length of the token. + We must not use this function out of bracket expressions. */ + +static int +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) +{ + unsigned char c; + if (re_string_eoi (input)) + { + token->type = END_OF_RE; + return 0; + } + c = re_string_peek_byte (input, 0); + token->opr.c = c; + +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1 && + !re_string_first_byte (input, re_string_cur_idx (input))) + { + token->type = CHARACTER; + return 1; + } +#endif /* RE_ENABLE_I18N */ + + if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) + && re_string_cur_idx (input) + 1 < re_string_length (input)) + { + /* In this case, '\' escape a character. */ + unsigned char c2; + re_string_skip_bytes (input, 1); + c2 = re_string_peek_byte (input, 0); + token->opr.c = c2; + token->type = CHARACTER; + return 1; + } + if (c == '[') /* '[' is a special char in a bracket exps. */ + { + unsigned char c2; + int token_len; + if (re_string_cur_idx (input) + 1 < re_string_length (input)) + c2 = re_string_peek_byte (input, 1); + else + c2 = 0; + token->opr.c = c2; + token_len = 2; + switch (c2) + { + case '.': + token->type = OP_OPEN_COLL_ELEM; + break; + case '=': + token->type = OP_OPEN_EQUIV_CLASS; + break; + case ':': + if (syntax & RE_CHAR_CLASSES) + { + token->type = OP_OPEN_CHAR_CLASS; + break; + } + /* else fall through. */ + default: + token->type = CHARACTER; + token->opr.c = c; + token_len = 1; + break; + } + return token_len; + } + switch (c) + { + case '-': + token->type = OP_CHARSET_RANGE; + break; + case ']': + token->type = OP_CLOSE_BRACKET; + break; + case '^': + token->type = OP_NON_MATCH_LIST; + break; + default: + token->type = CHARACTER; + } + return 1; +} + +/* Functions for parser. */ + +/* Entry point of the parser. + Parse the regular expression REGEXP and return the structure tree. + If an error is occured, ERR is set by error code, and return NULL. + This function build the following tree, from regular expression <reg_exp>: + CAT + / \ + / \ + <reg_exp> EOR + + CAT means concatenation. + EOR means end of regular expression. */ + +static bin_tree_t * +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *eor, *root; + re_token_t current_token; + dfa->syntax = syntax; + fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); + tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + eor = create_tree (dfa, NULL, NULL, END_OF_RE); + if (tree != NULL) + root = create_tree (dfa, tree, eor, CONCAT); + else + root = eor; + if (BE (eor == NULL || root == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + return root; +} + +/* This function build the following tree, from regular expression + <branch1>|<branch2>: + ALT + / \ + / \ + <branch1> <branch2> + + ALT means alternative, which represents the operator `|'. */ + +static bin_tree_t * +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree, *branch = NULL; + tree = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type == OP_ALT) + { + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + if (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + branch = parse_branch (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && branch == NULL, 0)) + return NULL; + } + else + branch = NULL; + tree = create_tree (dfa, tree, branch, OP_ALT); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + return tree; +} + +/* This function build the following tree, from regular expression + <exp1><exp2>: + CAT + / \ + / \ + <exp1> <exp2> + + CAT means concatenation. */ + +static bin_tree_t * +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + bin_tree_t *tree, *exp; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + tree = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + + while (token->type != OP_ALT && token->type != END_OF_RE + && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) + { + exp = parse_expression (regexp, preg, token, syntax, nest, err); + if (BE (*err != REG_NOERROR && exp == NULL, 0)) + { + return NULL; + } + if (tree != NULL && exp != NULL) + { + tree = create_tree (dfa, tree, exp, CONCAT); + if (tree == NULL) + { + *err = REG_ESPACE; + return NULL; + } + } + else if (tree == NULL) + tree = exp; + /* Otherwise exp == NULL, we don't need to create new tree. */ + } + return tree; +} + +/* This function build the following tree, from regular expression a*: + * + | + a +*/ + +static bin_tree_t * +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + switch (token->type) + { + case CHARACTER: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (!re_string_eoi (regexp) + && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) + { + bin_tree_t *mbc_remain; + fetch_token (token, regexp, syntax); + mbc_remain = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree, mbc_remain, CONCAT); + if (BE (mbc_remain == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + } +#endif + break; + case OP_OPEN_SUBEXP: + tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_OPEN_BRACKET: + tree = parse_bracket_exp (regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_BACK_REF: + if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) + { + *err = REG_ESUBREG; + return NULL; + } + dfa->used_bkref_map |= 1 << token->opr.idx; + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + ++dfa->nbackref; + dfa->has_mb_node = 1; + break; + case OP_OPEN_DUP_NUM: + if (syntax & RE_CONTEXT_INVALID_DUP) + { + *err = REG_BADRPT; + return NULL; + } + /* FALLTHROUGH */ + case OP_DUP_ASTERISK: + case OP_DUP_PLUS: + case OP_DUP_QUESTION: + if (syntax & RE_CONTEXT_INVALID_OPS) + { + *err = REG_BADRPT; + return NULL; + } + else if (syntax & RE_CONTEXT_INDEP_OPS) + { + fetch_token (token, regexp, syntax); + return parse_expression (regexp, preg, token, syntax, nest, err); + } + /* else fall through */ + case OP_CLOSE_SUBEXP: + if ((token->type == OP_CLOSE_SUBEXP) && + !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) + { + *err = REG_ERPAREN; + return NULL; + } + /* else fall through */ + case OP_CLOSE_DUP_NUM: + /* We treat it as a normal character. */ + + /* Then we can these characters as normal characters. */ + token->type = CHARACTER; + /* mb_partial and word_char bits should be initialized already + by peek_token. */ + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + break; + case ANCHOR: + if ((token->opr.ctx_type + & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) + && dfa->word_ops_used == 0) + init_word_char (dfa); + if (token->opr.ctx_type == WORD_DELIM + || token->opr.ctx_type == NOT_WORD_DELIM) + { + bin_tree_t *tree_first, *tree_last; + if (token->opr.ctx_type == WORD_DELIM) + { + token->opr.ctx_type = WORD_FIRST; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = WORD_LAST; + } + else + { + token->opr.ctx_type = INSIDE_WORD; + tree_first = create_token_tree (dfa, NULL, NULL, token); + token->opr.ctx_type = INSIDE_NOTWORD; + } + tree_last = create_token_tree (dfa, NULL, NULL, token); + tree = create_tree (dfa, tree_first, tree_last, OP_ALT); + if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + else + { + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + } + /* We must return here, since ANCHORs can't be followed + by repetition operators. + eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", + it must not be "<ANCHOR(^)><REPEAT(*)>". */ + fetch_token (token, regexp, syntax); + return tree; + case OP_PERIOD: + tree = create_token_tree (dfa, NULL, NULL, token); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + if (dfa->mb_cur_max > 1) + dfa->has_mb_node = 1; + break; + case OP_WORD: + case OP_NOTWORD: + tree = build_charclass_op (dfa, regexp->trans, + "alnum", + "_", + token->type == OP_NOTWORD, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, + "space", + "", + token->type == OP_NOTSPACE, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_ALT: + case END_OF_RE: + return NULL; + case BACK_SLASH: + *err = REG_EESCAPE; + return NULL; + default: + /* Must not happen? */ +#ifdef DEBUG + assert (0); +#endif + return NULL; + } + fetch_token (token, regexp, syntax); + + while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS + || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) + { + tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + /* In BRE consecutive duplications are not allowed. */ + if ((syntax & RE_CONTEXT_INVALID_DUP) + && (token->type == OP_DUP_ASTERISK + || token->type == OP_OPEN_DUP_NUM)) + { + *err = REG_BADRPT; + return NULL; + } + } + + return tree; +} + +/* This function build the following tree, from regular expression + (<reg_exp>): + SUBEXP + | + <reg_exp> +*/ + +static bin_tree_t * +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) +{ + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + bin_tree_t *tree; + size_t cur_nsub; + cur_nsub = preg->re_nsub++; + + fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); + + /* The subexpression may be a null string. */ + if (token->type == OP_CLOSE_SUBEXP) + tree = NULL; + else + { + tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); + if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) + *err = REG_EPAREN; + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + + if (cur_nsub <= '9' - '1') + dfa->completed_bkref_map |= 1 << cur_nsub; + + tree = create_tree (dfa, tree, NULL, SUBEXP); + if (BE (tree == NULL, 0)) + { + *err = REG_ESPACE; + return NULL; + } + tree->token.opr.idx = cur_nsub; + return tree; +} + +/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ + +static bin_tree_t * +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) +{ + bin_tree_t *tree = NULL, *old_tree = NULL; + int i, start, end, start_idx = re_string_cur_idx (regexp); +#ifndef RE_TOKEN_INIT_BUG + re_token_t start_token = *token; +#else + re_token_t start_token; + + memcpy ((void *) &start_token, (void *) token, sizeof start_token); +#endif + + if (token->type == OP_OPEN_DUP_NUM) + { + end = 0; + start = fetch_number (regexp, token, syntax); + if (start == -1) + { + if (token->type == CHARACTER && token->opr.c == ',') + start = 0; /* We treat "{,m}" as "{0,m}". */ + else + { + *err = REG_BADBR; /* <re>{} is invalid. */ + return NULL; + } + } + if (BE (start != -2, 1)) + { + /* We treat "{n}" as "{n,n}". */ + end = ((token->type == OP_CLOSE_DUP_NUM) ? start + : ((token->type == CHARACTER && token->opr.c == ',') + ? fetch_number (regexp, token, syntax) : -2)); + } + if (BE (start == -2 || end == -2, 0)) + { + /* Invalid sequence. */ + if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) + { + if (token->type == END_OF_RE) + *err = REG_EBRACE; + else + *err = REG_BADBR; + + return NULL; + } + + /* If the syntax bit is set, rollback. */ + re_string_set_index (regexp, start_idx); + *token = start_token; + token->type = CHARACTER; + /* mb_partial and word_char bits should be already initialized by + peek_token. */ + return elem; + } + + if (BE ((end != -1 && start > end) || token->type != OP_CLOSE_DUP_NUM, 0)) + { + /* First number greater than second. */ + *err = REG_BADBR; + return NULL; + } + } + else + { + start = (token->type == OP_DUP_PLUS) ? 1 : 0; + end = (token->type == OP_DUP_QUESTION) ? 1 : -1; + } + + fetch_token (token, regexp, syntax); + + if (BE (elem == NULL, 0)) + return NULL; + if (BE (start == 0 && end == 0, 0)) + { + postorder (elem, free_tree, NULL); + return NULL; + } + + /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ + if (BE (start > 0, 0)) + { + tree = elem; + for (i = 2; i <= start; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (start == end) + return tree; + + /* Duplicate ELEM before it is marked optional. */ + elem = duplicate_tree (elem, dfa); + old_tree = tree; + } + else + old_tree = NULL; + + if (elem->token.type == SUBEXP) + postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + + tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + + /* This loop is actually executed only when end != -1, + to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have + already created the start+1-th copy. */ + for (i = start + 2; i <= end; ++i) + { + elem = duplicate_tree (elem, dfa); + tree = create_tree (dfa, tree, elem, CONCAT); + if (BE (elem == NULL || tree == NULL, 0)) + goto parse_dup_op_espace; + + tree = create_tree (dfa, tree, NULL, OP_ALT); + if (BE (tree == NULL, 0)) + goto parse_dup_op_espace; + } + + if (old_tree) + tree = create_tree (dfa, old_tree, tree, CONCAT); + + return tree; + + parse_dup_op_espace: + *err = REG_ESPACE; + return NULL; +} + +/* Size of the names for collating symbol/equivalence_class/character_class. + I'm not sure, but maybe enough. */ +#define BRACKET_NAME_BUF_SIZE 32 + +#ifndef _LIBC + /* Local function for parse_bracket_exp only used in case of NOT _LIBC. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) +# else /* not RE_ENABLE_I18N */ +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) +# endif /* not RE_ENABLE_I18N */ +{ + unsigned int start_ch, end_ch; + /* Equivalence Classes and Character Classes can't be a range start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + /* We can handle no multi character collating elements without libc + support. */ + if (BE ((start_elem->type == COLL_SYM + && strlen ((char *) start_elem->opr.name) > 1) + || (end_elem->type == COLL_SYM + && strlen ((char *) end_elem->opr.name) > 1), 0)) + return REG_ECOLLATE; + +# ifdef RE_ENABLE_I18N + { + wchar_t wc; + wint_t start_wc; + wint_t end_wc; + wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + + start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); +#ifdef GAWK + /* + * Fedora Core 2, maybe others, have broken `btowc' that returns -1 + * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are + * unsigned, so we don't have sign extension problems. + */ + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? start_ch : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? end_ch : end_elem->opr.wch); +#else + start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) + ? __btowc (start_ch) : start_elem->opr.wch); + end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) + ? __btowc (end_ch) : end_elem->opr.wch); +#endif + if (start_wc == WEOF || end_wc == WEOF) + return REG_ECOLLATE; + cmp_buf[0] = start_wc; + cmp_buf[4] = end_wc; + if (wcscoll (cmp_buf, cmp_buf + 4) > 0) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, for !_LIBC we have no collation elements: if the + character set is single byte, the single byte character set + that we build below suffices. parse_bracket_exp passes + no MBCSET if dfa->mb_cur_max == 1. */ + if (mbcset) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + wchar_t *new_array_start, *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + /* Use realloc since mbcset->range_starts and mbcset->range_ends + are NULL if *range_alloc == 0. */ + new_array_start = re_realloc (mbcset->range_starts, wchar_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, wchar_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_wc; + mbcset->range_ends[mbcset->nranges++] = end_wc; + } + + /* Build the table for single byte characters. */ + for (wc = 0; wc < SBC_MAX; ++wc) + { + cmp_buf[2] = wc; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + bitset_set (sbcset, wc); + } + } +# else /* not RE_ENABLE_I18N */ + { + unsigned int ch; + start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch + : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] + : 0)); + end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch + : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] + : 0)); + if (start_ch > end_ch) + return REG_ERANGE; + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ++ch) + if (start_ch <= ch && ch <= end_ch) + bitset_set (sbcset, ch); + } +# endif /* not RE_ENABLE_I18N */ + return REG_NOERROR; +} +#endif /* not _LIBC */ + +#ifndef _LIBC +/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument since we may update it. */ + +static reg_errcode_t +internal_function +# ifdef RE_ENABLE_I18N +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (bitset_t sbcset, const unsigned char *name) +# endif /* not RE_ENABLE_I18N */ +{ + size_t name_len = strlen ((const char *) name); + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } +} +#endif /* not _LIBC */ + +/* This function parse bracket expression like "[abc]", "[a-c]", + "[[.a-a.]]" etc. */ + +static bin_tree_t * +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) +{ +#ifdef _LIBC + const unsigned char *collseqmb; + const char *collseqwc; + uint32_t nrules; + int32_t table_size; + const int32_t *symb_table; + const unsigned char *extra; + + /* Local function for parse_bracket_exp used in _LIBC environement. + Seek the collating symbol entry correspondings to NAME. + Return the index of the symbol in the SYMB_TABLE. */ + + auto inline int32_t + __attribute ((always_inline)) + seek_collating_symbol_entry (name, name_len) + const unsigned char *name; + size_t name_len; + { + int32_t hash = elem_hash ((const char *) name, name_len); + int32_t elem = hash % table_size; + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do + { + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } + + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); + } + return elem; + } + + /* Local function for parse_bracket_exp used in _LIBC environment. + Look up the collation sequence value of BR_ELEM. + Return the value if succeeded, UINT_MAX otherwise. */ + + auto inline unsigned int + __attribute ((always_inline)) + lookup_collation_sequence_value (br_elem) + bracket_elem_t *br_elem; + { + if (br_elem->type == SB_CHAR) + { + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + return collseqmb[br_elem->opr.ch]; + else + { + wint_t wc = __btowc (br_elem->opr.ch); + return __collseq_table_lookup (collseqwc, wc); + } + } + else if (br_elem->type == MB_CHAR) + { + if (nrules != 0) + return __collseq_table_lookup (collseqwc, br_elem->opr.wch); + } + else if (br_elem->type == COLL_SYM) + { + size_t sym_name_len = strlen ((char *) br_elem->opr.name); + if (nrules != 0) + { + int32_t elem, idx; + elem = seek_collating_symbol_entry (br_elem->opr.name, + sym_name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + /* Skip the byte sequence of the collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the multibyte collation sequence value. */ + idx += sizeof (unsigned int); + /* Skip the wide char sequence of the collating element. */ + idx += sizeof (unsigned int) * + (1 + *(unsigned int *) (extra + idx)); + /* Return the collation sequence value. */ + return *(unsigned int *) (extra + idx); + } + else if (symb_table[2 * elem] == 0 && sym_name_len == 1) + { + /* No valid character. Match it as a single byte + character. */ + return collseqmb[br_elem->opr.name[0]]; + } + } + else if (sym_name_len == 1) + return collseqmb[br_elem->opr.name[0]]; + } + return UINT_MAX; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the range expression which starts from START_ELEM, and ends + at END_ELEM. The result are written to MBCSET and SBCSET. + RANGE_ALLOC is the allocated size of mbcset->range_starts, and + mbcset->range_ends, is a pointer argument sinse we may + update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) + re_charset_t *mbcset; + int *range_alloc; + bitset_t sbcset; + bracket_elem_t *start_elem, *end_elem; + { + unsigned int ch; + uint32_t start_collseq; + uint32_t end_collseq; + + /* Equivalence Classes and Character Classes can't be a range + start/end. */ + if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS + || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, + 0)) + return REG_ERANGE; + + start_collseq = lookup_collation_sequence_value (start_elem); + end_collseq = lookup_collation_sequence_value (end_elem); + /* Check start/end collation sequence values. */ + if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) + return REG_ECOLLATE; + if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) + return REG_ERANGE; + + /* Got valid collation sequence values, add them as a new entry. + However, if we have no collation elements, and the character set + is single byte, the single byte character set that we + build below suffices. */ + if (nrules > 0 || dfa->mb_cur_max > 1) + { + /* Check the space of the arrays. */ + if (BE (*range_alloc == mbcset->nranges, 0)) + { + /* There is not enough space, need realloc. */ + uint32_t *new_array_start; + uint32_t *new_array_end; + int new_nranges; + + /* +1 in case of mbcset->nranges is 0. */ + new_nranges = 2 * mbcset->nranges + 1; + new_array_start = re_realloc (mbcset->range_starts, uint32_t, + new_nranges); + new_array_end = re_realloc (mbcset->range_ends, uint32_t, + new_nranges); + + if (BE (new_array_start == NULL || new_array_end == NULL, 0)) + return REG_ESPACE; + + mbcset->range_starts = new_array_start; + mbcset->range_ends = new_array_end; + *range_alloc = new_nranges; + } + + mbcset->range_starts[mbcset->nranges] = start_collseq; + mbcset->range_ends[mbcset->nranges++] = end_collseq; + } + + /* Build the table for single byte characters. */ + for (ch = 0; ch < SBC_MAX; ch++) + { + uint32_t ch_collseq; + /* + if (MB_CUR_MAX == 1) + */ + if (nrules == 0) + ch_collseq = collseqmb[ch]; + else + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) + bitset_set (sbcset, ch); + } + return REG_NOERROR; + } + + /* Local function for parse_bracket_exp used in _LIBC environement. + Build the collating element which is represented by NAME. + The result are written to MBCSET and SBCSET. + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a + pointer argument sinse we may update it. */ + + auto inline reg_errcode_t + __attribute ((always_inline)) + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) + re_charset_t *mbcset; + int *coll_sym_alloc; + bitset_t sbcset; + const unsigned char *name; + { + int32_t elem, idx; + size_t name_len = strlen ((const char *) name); + if (nrules != 0) + { + elem = seek_collating_symbol_entry (name, name_len); + if (symb_table[2 * elem] != 0) + { + /* We found the entry. */ + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element name. */ + idx += 1 + extra[idx]; + } + else if (symb_table[2 * elem] == 0 && name_len == 1) + { + /* No valid character, treat it as a normal + character. */ + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + else + return REG_ECOLLATE; + + /* Got valid collation sequence, add it as a new entry. */ + /* Check the space of the arrays. */ + if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->ncoll_syms is 0. */ + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + /* Use realloc since mbcset->coll_syms is NULL + if *alloc == 0. */ + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, + new_coll_sym_alloc); + if (BE (new_coll_syms == NULL, 0)) + return REG_ESPACE; + mbcset->coll_syms = new_coll_syms; + *coll_sym_alloc = new_coll_sym_alloc; + } + mbcset->coll_syms[mbcset->ncoll_syms++] = idx; + return REG_NOERROR; + } + else + { + if (BE (name_len != 1, 0)) + return REG_ECOLLATE; + else + { + bitset_set (sbcset, name[0]); + return REG_NOERROR; + } + } + } +#endif + + re_token_t br_token; + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; + int equiv_class_alloc = 0, char_class_alloc = 0; +#endif /* not RE_ENABLE_I18N */ + int non_match = 0; + bin_tree_t *work_tree; + int token_len; + int first_round = 1; +#ifdef _LIBC + collseqmb = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules) + { + /* + if (MB_CUR_MAX > 1) + */ + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); + symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_TABLEMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_SYMB_EXTRAMB); + } +#endif + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_NON_MATCH_LIST) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + non_match = 1; + if (syntax & RE_HAT_LISTS_NOT_NEWLINE) + bitset_set (sbcset, '\n'); + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + token_len = peek_token_bracket (token, regexp, syntax); + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_BADPAT; + goto parse_bracket_exp_free_return; + } + } + + /* We treat the first ']' as a normal character. */ + if (token->type == OP_CLOSE_BRACKET) + token->type = CHARACTER; + + while (1) + { + bracket_elem_t start_elem, end_elem; + unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; + unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; + reg_errcode_t ret; + int token_len2 = 0, is_range_exp = 0; + re_token_t token2; + + start_elem.opr.name = start_name_buf; + ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, + syntax, first_round); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + first_round = 0; + + /* Get information about the next token. We need it in any case. */ + token_len = peek_token_bracket (token, regexp, syntax); + + /* Do not check for ranges if we know they are not allowed. */ + if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) + { + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CHARSET_RANGE) + { + re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ + token_len2 = peek_token_bracket (&token2, regexp, syntax); + if (BE (token2.type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token2.type == OP_CLOSE_BRACKET) + { + /* We treat the last '-' as a normal character. */ + re_string_skip_bytes (regexp, -token_len); + token->type = CHARACTER; + } + else + is_range_exp = 1; + } + } + + if (is_range_exp == 1) + { + end_elem.opr.name = end_name_buf; + ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, + dfa, syntax, 1); + if (BE (ret != REG_NOERROR, 0)) + { + *err = ret; + goto parse_bracket_exp_free_return; + } + + token_len = peek_token_bracket (token, regexp, syntax); + +#ifdef _LIBC + *err = build_range_exp (sbcset, mbcset, &range_alloc, + &start_elem, &end_elem); +#else +# ifdef RE_ENABLE_I18N + *err = build_range_exp (sbcset, + dfa->mb_cur_max > 1 ? mbcset : NULL, + &range_alloc, &start_elem, &end_elem); +# else + *err = build_range_exp (sbcset, &start_elem, &end_elem); +# endif +#endif /* RE_ENABLE_I18N */ + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + } + else + { + switch (start_elem.type) + { + case SB_CHAR: + bitset_set (sbcset, start_elem.opr.ch); + break; +#ifdef RE_ENABLE_I18N + case MB_CHAR: + /* Check whether the array has enough space. */ + if (BE (mbchar_alloc == mbcset->nmbchars, 0)) + { + wchar_t *new_mbchars; + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nmbchars is 0. */ + mbchar_alloc = 2 * mbcset->nmbchars + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + new_mbchars = re_realloc (mbcset->mbchars, wchar_t, + mbchar_alloc); + if (BE (new_mbchars == NULL, 0)) + goto parse_bracket_exp_espace; + mbcset->mbchars = new_mbchars; + } + mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; + break; +#endif /* RE_ENABLE_I18N */ + case EQUIV_CLASS: + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case COLL_SYM: + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ + start_elem.opr.name); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + case CHAR_CLASS: + *err = build_charclass (regexp->trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ + (const char *) start_elem.opr.name, syntax); + if (BE (*err != REG_NOERROR, 0)) + goto parse_bracket_exp_free_return; + break; + default: + assert (0); + break; + } + } + if (BE (token->type == END_OF_RE, 0)) + { + *err = REG_EBRACK; + goto parse_bracket_exp_free_return; + } + if (token->type == OP_CLOSE_BRACKET) + break; + } + + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); + + if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes + || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes + || mbcset->non_match))) + { + bin_tree_t *mbc_tree; + int sbc_idx; + /* Build a tree for complex bracket. */ + dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) + if (sbcset[sbc_idx]) + break; + /* If there are no bits set in sbcset, there is no point + of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ + if (sbc_idx < BITSET_WORDS) + { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else + { + re_free (sbcset); + work_tree = mbc_tree; + } + } + else +#endif /* not RE_ENABLE_I18N */ + { +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + return work_tree; + + parse_bracket_exp_espace: + *err = REG_ESPACE; + parse_bracket_exp_free_return: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + return NULL; +} + +/* Parse an element in the bracket expression. */ + +static reg_errcode_t +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) +{ +#ifdef RE_ENABLE_I18N + int cur_char_size; + cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); + if (cur_char_size > 1) + { + elem->type = MB_CHAR; + elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); + re_string_skip_bytes (regexp, cur_char_size); + return REG_NOERROR; + } +#endif /* RE_ENABLE_I18N */ + re_string_skip_bytes (regexp, token_len); /* Skip a token. */ + if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS + || token->type == OP_OPEN_EQUIV_CLASS) + return parse_bracket_symbol (elem, regexp, token); + if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) + { + /* A '-' must only appear as anything but a range indicator before + the closing bracket. Everything else is an error. */ + re_token_t token2; + (void) peek_token_bracket (&token2, regexp, syntax); + if (token2.type != OP_CLOSE_BRACKET) + /* The actual error value is not standardized since this whole + case is undefined. But ERANGE makes good sense. */ + return REG_ERANGE; + } + elem->type = SB_CHAR; + elem->opr.ch = token->opr.c; + return REG_NOERROR; +} + +/* Parse a bracket symbol in the bracket expression. Bracket symbols are + such as [:<character_class>:], [.<collating_element>.], and + [=<equivalent_class>=]. */ + +static reg_errcode_t +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) +{ + unsigned char ch, delim = token->opr.c; + int i = 0; + if (re_string_eoi(regexp)) + return REG_EBRACK; + for (;; ++i) + { + if (i >= BRACKET_NAME_BUF_SIZE) + return REG_EBRACK; + if (token->type == OP_OPEN_CHAR_CLASS) + ch = re_string_fetch_byte_case (regexp); + else + ch = re_string_fetch_byte (regexp); + if (re_string_eoi(regexp)) + return REG_EBRACK; + if (ch == delim && re_string_peek_byte (regexp, 0) == ']') + break; + elem->opr.name[i] = ch; + } + re_string_skip_bytes (regexp, 1); + elem->opr.name[i] = '\0'; + switch (token->type) + { + case OP_OPEN_COLL_ELEM: + elem->type = COLL_SYM; + break; + case OP_OPEN_EQUIV_CLASS: + elem->type = EQUIV_CLASS; + break; + case OP_OPEN_CHAR_CLASS: + elem->type = CHAR_CLASS; + break; + default: + break; + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the equivalence class which is represented by NAME. + The result are written to MBCSET and SBCSET. + EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) +#else /* not RE_ENABLE_I18N */ +build_equiv_class (bitset_t sbcset, const unsigned char *name) +#endif /* not RE_ENABLE_I18N */ +{ +#ifdef _LIBC + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + const int32_t *table, *indirect; + const unsigned char *weights, *extra, *cp; + unsigned char char_buf[2]; + int32_t idx1, idx2; + unsigned int ch; + size_t len; + /* This #include defines a local function! */ +# include <locale/weight.h> + /* Calculate the index for equivalence class. */ + cp = name; + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + idx1 = findidx (&cp); + if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + /* This isn't a valid character. */ + return REG_ECOLLATE; + + /* Build single byte matcing table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; + len = weights[idx1 & 0xffffff]; + for (ch = 0; ch < SBC_MAX; ++ch) + { + char_buf[0] = ch; + cp = char_buf; + idx2 = findidx (&cp); +/* + idx2 = table[ch]; +*/ + if (idx2 == 0) + /* This isn't a valid character. */ + continue; + /* Compare only if the length matches and the collation rule + index is the same. */ + if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) + { + int cnt = 0; + + while (cnt <= len && + weights[(idx1 & 0xffffff) + 1 + cnt] + == weights[(idx2 & 0xffffff) + 1 + cnt]) + ++cnt; + + if (cnt > len) + bitset_set (sbcset, ch); + } + } + /* Check whether the array has enough space. */ + if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nequiv_classes is 0. */ + int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + /* Use realloc since the array is NULL if *alloc == 0. */ + int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, + int32_t, + new_equiv_class_alloc); + if (BE (new_equiv_classes == NULL, 0)) + return REG_ESPACE; + mbcset->equiv_classes = new_equiv_classes; + *equiv_class_alloc = new_equiv_class_alloc; + } + mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; + } + else +#endif /* _LIBC */ + { + if (BE (strlen ((const char *) name) != 1, 0)) + return REG_ECOLLATE; + bitset_set (sbcset, *name); + } + return REG_NOERROR; +} + + /* Helper function for parse_bracket_exp. + Build the character class which is represented by NAME. + The result are written to MBCSET and SBCSET. + CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, + is a pointer argument sinse we may update it. */ + +static reg_errcode_t +#ifdef RE_ENABLE_I18N +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const char *class_name, reg_syntax_t syntax) +#else /* not RE_ENABLE_I18N */ +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const char *class_name, reg_syntax_t syntax) +#endif /* not RE_ENABLE_I18N */ +{ + int i; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (class_name, "upper") == 0 || strcmp (class_name, "lower") == 0)) + class_name = "alpha"; + +#ifdef RE_ENABLE_I18N + /* Check the space of the arrays. */ + if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) + { + /* Not enough, realloc it. */ + /* +1 in case of mbcset->nchar_classes is 0. */ + int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + /* Use realloc since array is NULL if *alloc == 0. */ + wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, + new_char_class_alloc); + if (BE (new_char_classes == NULL, 0)) + return REG_ESPACE; + mbcset->char_classes = new_char_classes; + *char_class_alloc = new_char_class_alloc; + } + mbcset->char_classes[mbcset->nchar_classes++] = __wctype (class_name); +#endif /* RE_ENABLE_I18N */ + +#define BUILD_CHARCLASS_LOOP(ctype_func) \ + do { \ + if (BE (trans != NULL, 0)) \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) + + if (strcmp (class_name, "alnum") == 0) + BUILD_CHARCLASS_LOOP (isalnum); + else if (strcmp (class_name, "cntrl") == 0) + BUILD_CHARCLASS_LOOP (iscntrl); + else if (strcmp (class_name, "lower") == 0) + BUILD_CHARCLASS_LOOP (islower); + else if (strcmp (class_name, "space") == 0) + BUILD_CHARCLASS_LOOP (isspace); + else if (strcmp (class_name, "alpha") == 0) + BUILD_CHARCLASS_LOOP (isalpha); + else if (strcmp (class_name, "digit") == 0) + BUILD_CHARCLASS_LOOP (isdigit); + else if (strcmp (class_name, "print") == 0) + BUILD_CHARCLASS_LOOP (isprint); + else if (strcmp (class_name, "upper") == 0) + BUILD_CHARCLASS_LOOP (isupper); + else if (strcmp (class_name, "blank") == 0) +#ifndef GAWK + BUILD_CHARCLASS_LOOP (isblank); +#else + /* see comments above */ + BUILD_CHARCLASS_LOOP (is_blank); +#endif + else if (strcmp (class_name, "graph") == 0) + BUILD_CHARCLASS_LOOP (isgraph); + else if (strcmp (class_name, "punct") == 0) + BUILD_CHARCLASS_LOOP (ispunct); + else if (strcmp (class_name, "xdigit") == 0) + BUILD_CHARCLASS_LOOP (isxdigit); + else + return REG_ECTYPE; + + return REG_NOERROR; +} + +static bin_tree_t * +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const char *class_name, + const char *extra, int non_match, + reg_errcode_t *err) +{ + re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; + int alloc = 0; +#endif /* not RE_ENABLE_I18N */ + reg_errcode_t ret; + re_token_t br_token; + bin_tree_t *tree; + + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); +#ifdef RE_ENABLE_I18N + mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N + if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ + { + *err = REG_ESPACE; + return NULL; + } + + if (non_match) + { +#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; +#endif /* not RE_ENABLE_I18N */ + } + + /* We don't care the syntax in this case. */ + ret = build_charclass (trans, sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + class_name, 0); + + if (BE (ret != REG_NOERROR, 0)) + { + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = ret; + return NULL; + } + /* \w match '_' also. */ + for (; *extra; extra++) + bitset_set (sbcset, *extra); + + /* If it is non-matching list. */ + if (non_match) + bitset_not (sbcset); + +#ifdef RE_ENABLE_I18N + /* Ensure only single byte characters are set. */ + if (dfa->mb_cur_max > 1) + bitset_mask (sbcset, dfa->sb_char); +#endif + + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (tree == NULL, 0)) + goto build_word_op_espace; + +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + bin_tree_t *mbc_tree; + /* Build a tree for complex bracket. */ + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + dfa->has_mb_node = 1; + mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto build_word_op_espace; + /* Then join them by ALT node. */ + tree = create_tree (dfa, tree, mbc_tree, OP_ALT); + if (BE (mbc_tree != NULL, 1)) + return tree; + } + else + { + free_charset (mbcset); + return tree; + } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + + build_word_op_espace: + re_free (sbcset); +#ifdef RE_ENABLE_I18N + free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ + *err = REG_ESPACE; + return NULL; +} + +/* This is intended for the expressions like "a{1,3}". + Fetch a number from `input', and return the number. + Return -1, if the number field is empty like "{,1}". + Return -2, If an error is occured. */ + +static int +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) +{ + int num = -1; + unsigned char c; + while (1) + { + fetch_token (token, input, syntax); + c = token->opr.c; + if (BE (token->type == END_OF_RE, 0)) + return -2; + if (token->type == OP_CLOSE_DUP_NUM || c == ',') + break; + num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) + ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); + num = (num > RE_DUP_MAX) ? -2 : num; + } + return num; +} + +#ifdef RE_ENABLE_I18N +static void +free_charset (re_charset_t *cset) +{ + re_free (cset->mbchars); +# ifdef _LIBC + re_free (cset->coll_syms); + re_free (cset->equiv_classes); + re_free (cset->range_starts); + re_free (cset->range_ends); +# endif + re_free (cset->char_classes); + re_free (cset); +} +#endif /* RE_ENABLE_I18N */ + +/* Functions for binary tree operation. */ + +/* Create a tree node. */ + +static bin_tree_t * +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) +{ + re_token_t t; + t.type = type; + return create_token_tree (dfa, left, right, &t); +} + +static bin_tree_t * +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) +{ + bin_tree_t *tree; + if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) + { + bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); + + if (storage == NULL) + return NULL; + storage->next = dfa->str_tree_storage; + dfa->str_tree_storage = storage; + dfa->str_tree_storage_idx = 0; + } + tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; + + tree->parent = NULL; + tree->left = left; + tree->right = right; + tree->token = *token; + tree->token.duplicated = 0; + tree->token.opt_subexp = 0; + tree->first = NULL; + tree->next = NULL; + tree->node_idx = -1; + + if (left != NULL) + left->parent = tree; + if (right != NULL) + right->parent = tree; + return tree; +} + +/* Mark the tree SRC as an optional subexpression. + To be called from preorder or postorder. */ + +static reg_errcode_t +mark_opt_subexp (void *extra, bin_tree_t *node) +{ + int idx = (int) (long) extra; + if (node->token.type == SUBEXP && node->token.opr.idx == idx) + node->token.opt_subexp = 1; + + return REG_NOERROR; +} + +/* Free the allocated memory inside NODE. */ + +static void +free_token (re_token_t *node) +{ +#ifdef RE_ENABLE_I18N + if (node->type == COMPLEX_BRACKET && node->duplicated == 0) + free_charset (node->opr.mbcset); + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + re_free (node->opr.sbcset); +} + +/* Worker function for tree walking. Free the allocated memory inside NODE + and its children. */ + +static reg_errcode_t +free_tree (void *extra, bin_tree_t *node) +{ + free_token (&node->token); + return REG_NOERROR; +} + + +/* Duplicate the node SRC, and return new node. This is a preorder + visit similar to the one implemented by the generic visitor, but + we need more infrastructure to maintain two parallel trees --- so, + it's easier to duplicate. */ + +static bin_tree_t * +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) +{ + const bin_tree_t *node; + bin_tree_t *dup_root; + bin_tree_t **p_new = &dup_root, *dup_node = root->parent; + + for (node = root; ; ) + { + /* Create a new tree and link it back to the current parent. */ + *p_new = create_token_tree (dfa, NULL, NULL, &node->token); + if (*p_new == NULL) + return NULL; + (*p_new)->parent = dup_node; + (*p_new)->token.duplicated = 1; + dup_node = *p_new; + + /* Go to the left node, or up and to the right. */ + if (node->left) + { + node = node->left; + p_new = &dup_node->left; + } + else + { + const bin_tree_t *prev = NULL; + while (node->right == prev || node->right == NULL) + { + prev = node; + node = node->parent; + dup_node = dup_node->parent; + if (!node) + return dup_root; + } + node = node->right; + p_new = &dup_node->right; + } + } +} diff --git a/compat/regex/regex.c b/compat/regex/regex.c index be851fc502..3dd8dfa01f 100644 --- a/compat/regex/regex.c +++ b/compat/regex/regex.c @@ -1,4924 +1,87 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for - internationalization features.) +/* Extended regular expression matching and search library. + Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - Copyright (C) 1993 Free Software Foundation, Inc. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* AIX requires this to be the first thing in the file. */ -#if defined (_AIX) && !defined (REGEX_MALLOC) - #pragma alloca -#endif - -#define _GNU_SOURCE - -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -#include <sys/types.h> - -/* We used to test for `BSTRING' here, but only GCC and Emacs define - `BSTRING', as far as I know, and neither of them use this code. */ -#include <string.h> -#ifndef bcmp -#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) -#endif -#ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) -#endif -#ifndef bzero -#define bzero(s, n) memset ((s), 0, (n)) -#endif - -#include <stdlib.h> - - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -#ifndef Sword -#define Sword 1 -#endif - -#ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -/* How many characters in the character set. */ -#define CHAR_SET_SIZE 256 - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void -init_syntax_once () -{ - register int c; - static int done = 0; - - if (done) - return; - - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 'a'; c <= 'z'; c++) - re_syntax_table[c] = Sword; - - for (c = 'A'; c <= 'Z'; c++) - re_syntax_table[c] = Sword; - - for (c = '0'; c <= '9'; c++) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -#endif /* not SYNTAX_TABLE */ - -#define SYNTAX(c) re_syntax_table[c] - - -/* Get the interface, including the syntax bits. */ -#include "regex.h" - -/* isalpha etc. are used for the character classes. */ -#include <ctype.h> - -#ifndef isascii -#define isascii(c) 1 -#endif - -#ifdef isblank -#define ISBLANK(c) (isascii (c) && isblank (c)) -#else -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -#define ISGRAPH(c) (isascii (c) && isgraph (c)) -#else -#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) -#endif - -#define ISPRINT(c) (isascii (c) && isprint (c)) -#define ISDIGIT(c) (isascii (c) && isdigit (c)) -#define ISALNUM(c) (isascii (c) && isalnum (c)) -#define ISALPHA(c) (isascii (c) && isalpha (c)) -#define ISCNTRL(c) (isascii (c) && iscntrl (c)) -#define ISLOWER(c) (isascii (c) && islower (c)) -#define ISPUNCT(c) (isascii (c) && ispunct (c)) -#define ISSPACE(c) (isascii (c) && isspace (c)) -#define ISUPPER(c) (isascii (c) && isupper (c)) -#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) - -#ifndef NULL -#define NULL 0 -#endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -#ifdef REGEX_MALLOC - -#define REGEX_ALLOCATE malloc -#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) - -#else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -#ifndef alloca - -/* Make alloca work the best possible way. */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not __GNUC__ */ -#if HAVE_ALLOCA_H -#include <alloca.h> -#else /* not __GNUC__ or HAVE_ALLOCA_H */ -#ifndef _AIX /* Already did AIX, up at the top. */ -char *alloca (); -#endif /* not _AIX */ -#endif /* not HAVE_ALLOCA_H */ -#endif /* not __GNUC__ */ - -#endif /* not alloca */ - -#define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -#define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - bcopy (source, destination, osize), \ - destination) - -#endif /* not REGEX_MALLOC */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -#define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -#define BYTEWIDTH 8 /* In bits. */ - -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -#define false 0 -#define true 1 - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. - - The value of `exactn' is needed in search.c (search_buffer) in Emacs. - So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of - `exactn' we use here must also be 1. */ - -typedef enum -{ - no_op = 0, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn = 1, - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -#ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -#endif /* emacs */ -} re_opcode_t; - -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ - -#define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ - -#define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += 2; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ - -#define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) - -#ifdef DEBUG -static void -extract_number (dest, source) - int *dest; - unsigned char *source; -{ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -} - -#ifndef EXTRACT_MACROS /* To debug the macros. */ -#undef EXTRACT_NUMBER -#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += 2; \ - } while (0) - -#ifdef DEBUG -static void -extract_number_and_incr (destination, source) - int *destination; - unsigned char **source; -{ - extract_number (destination, *source); - *source += 2; -} - -#ifndef EXTRACT_MACROS -#undef EXTRACT_NUMBER_AND_INCR -#define EXTRACT_NUMBER_AND_INCR(dest, src) \ - extract_number_and_incr (&dest, &src) -#endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -#ifdef DEBUG - -/* We use standard I/O for debugging. */ -#include <stdio.h> - -/* It is useful to test things that ``must'' be true when debugging. */ -#include <assert.h> - -static int debug = 0; - -#define DEBUG_STATEMENT(e) e -#define DEBUG_PRINT1(x) if (debug) printf (x) -#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) - - -extern void printchar (); - -/* Print the fastmap in human-readable form. */ - -void -print_fastmap (fastmap) - char *fastmap; -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - printchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - printchar (i - 1); - } - } - } - putchar ('\n'); -} - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -void -print_partial_compiled_pattern (start, end) - unsigned char *start; - unsigned char *end; -{ - int mcnt, mcnt2; - unsigned char *p = start; - unsigned char *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - printchar (*p++); - } - while (--mcnt); - break; - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%d", mcnt, *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%d", mcnt, *p++); - break; - - case duplicate: - printf ("/duplicate/%d", *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { - register int c; - - printf ("/charset%s", - (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); - - assert (p + *p < pend); - - for (c = 0; c < *p; c++) - { - unsigned bit; - unsigned char map_byte = p[1 + c]; - - putchar ('/'); - - for (bit = 0; bit < BYTEWIDTH; bit++) - if (map_byte & (1 << bit)) - printchar (c * BYTEWIDTH + bit); - } - p += 1 + *p; - break; - } - - case begline: - printf ("/begline"); - break; - - case endline: - printf ("/endline"); - break; - - case on_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_jump/0/%d", mcnt); - break; - - case on_failure_keep_string_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_keep_string_jump/0/%d", mcnt); - break; - - case dummy_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/dummy_failure_jump/0/%d", mcnt); - break; - - case push_dummy_failure: - printf ("/push_dummy_failure"); - break; - - case maybe_pop_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/maybe_pop_jump/0/%d", mcnt); - break; - - case pop_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/pop_failure_jump/0/%d", mcnt); - break; - - case jump_past_alt: - extract_number_and_incr (&mcnt, &p); - printf ("/jump_past_alt/0/%d", mcnt); - break; - - case jump: - extract_number_and_incr (&mcnt, &p); - printf ("/jump/0/%d", mcnt); - break; - - case succeed_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); - break; - - case jump_n: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); - break; - - case set_number_at: - extract_number_and_incr (&mcnt, &p); - extract_number_and_incr (&mcnt2, &p); - printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); - break; - - case wordbound: - printf ("/wordbound"); - break; - - case notwordbound: - printf ("/notwordbound"); - break; - - case wordbeg: - printf ("/wordbeg"); - break; - - case wordend: - printf ("/wordend"); - -#ifdef emacs - case before_dot: - printf ("/before_dot"); - break; - - case at_dot: - printf ("/at_dot"); - break; - - case after_dot: - printf ("/after_dot"); - break; - - case syntaxspec: - printf ("/syntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; - - case notsyntaxspec: - printf ("/notsyntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; -#endif /* emacs */ - - case wordchar: - printf ("/wordchar"); - break; - - case notwordchar: - printf ("/notwordchar"); - break; - - case begbuf: - printf ("/begbuf"); - break; - - case endbuf: - printf ("/endbuf"); - break; - - default: - printf ("?%d", *(p-1)); - } - } - printf ("/\n"); -} - - -void -print_compiled_pattern (bufp) - struct re_pattern_buffer *bufp; -{ - unsigned char *buffer = bufp->buffer; - - print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); - - if (bufp->fastmap_accurate && bufp->fastmap) - { - printf ("fastmap: "); - print_fastmap (bufp->fastmap); - } - - printf ("re_nsub: %d\t", bufp->re_nsub); - printf ("regs_alloc: %d\t", bufp->regs_allocated); - printf ("can_be_null: %d\t", bufp->can_be_null); - printf ("newline_anchor: %d\n", bufp->newline_anchor); - printf ("no_sub: %d\t", bufp->no_sub); - printf ("not_bol: %d\t", bufp->not_bol); - printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %d\n", bufp->syntax); - /* Perhaps we should print the translate table? */ -} - - -void -print_double_string (where, string1, size1, string2, size2) - const char *where; - const char *string1; - const char *string2; - int size1; - int size2; -{ - unsigned this_char; - - if (where == NULL) - printf ("(null)"); - else - { - if (FIRST_STRING_P (where)) - { - for (this_char = where - string1; this_char < size1; this_char++) - printchar (string1[this_char]); - - where = string2; - } - - for (this_char = where - string2; this_char < size2; this_char++) - printchar (string2[this_char]); - } -} - -#else /* not DEBUG */ - -#undef assert -#define assert(e) - -#define DEBUG_STATEMENT(e) -#define DEBUG_PRINT1(x) -#define DEBUG_PRINT2(x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) - -#endif /* not DEBUG */ - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. */ - -static const char *re_error_msg[] = - { NULL, /* REG_NOERROR */ - "No match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "Invalid collation character", /* REG_ECOLLATE */ - "Invalid character class name", /* REG_ECTYPE */ - "Trailing backslash", /* REG_EESCAPE */ - "Invalid back reference", /* REG_ESUBREG */ - "Unmatched [ or [^", /* REG_EBRACK */ - "Unmatched ( or \\(", /* REG_EPAREN */ - "Unmatched \\{", /* REG_EBRACE */ - "Invalid content of \\{\\}", /* REG_BADBR */ - "Invalid range end", /* REG_ERANGE */ - "Memory exhausted", /* REG_ESPACE */ - "Invalid preceding regular expression", /* REG_BADRPT */ - "Premature end of regular expression", /* REG_EEND */ - "Regular expression too big", /* REG_ESIZE */ - "Unmatched ) or \\)", /* REG_ERPAREN */ - }; - -/* Subroutine declarations and macros for regex_compile. */ - -static void store_op1 (), store_op2 (); -static void insert_op1 (), insert_op2 (); -static boolean at_begline_loc_p (), at_endline_loc_p (); -static boolean group_in_compile_stack (); -static reg_errcode_t compile_range (); - -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -#define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = translate[c]; \ - } while (0) - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - } while (0) - -/* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- - - -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) - - -/* Macros for outputting the compiled pattern into `buffer'. */ - -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 32 - -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - while (b - bufp->buffer + (n) > bufp->allocated) \ - EXTEND_BUFFER () - -/* Make sure we have one more byte of buffer space and then add C to it. */ -#define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (unsigned char) (c); \ - } while (0) - - -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -#define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - } while (0) - - -/* As with BUF_PUSH_2, except for three bytes. */ -#define BUF_PUSH_3(c1, c2, c3) \ - do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - *b++ = (unsigned char) (c3); \ - } while (0) - - -/* Store a jump with opcode OP at LOC to location TO. We store a - relative address offset by the three bytes the jump itself occupies. */ -#define STORE_JUMP(op, loc, to) \ - store_op1 (op, loc, (to) - (loc) - 3) - -/* Likewise, for a two-argument jump. */ -#define STORE_JUMP2(op, loc, to, arg) \ - store_op2 (op, loc, (to) - (loc) - 3, arg) - -/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP(op, loc, to) \ - insert_op1 (op, loc, (to) - (loc) - 3, b) - -/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP2(op, loc, to, arg) \ - insert_op2 (op, loc, (to) - (loc) - 3, arg, b) - - -/* This is not an arbitrary limit: the arguments which represent offsets - into the pattern are two bytes long. So if 2^16 bytes turns out to - be too small, many things would have to change. */ -#define MAX_BUF_SIZE (1L << 16) - - -/* Extend the buffer by twice its current size via realloc and - reset the pointers that pointed into the old block to point to the - correct places in the new one. If extending the buffer results in it - being larger than MAX_BUF_SIZE, then flag memory exhausted. */ -#define EXTEND_BUFFER() \ - do { \ - unsigned char *old_buffer = bufp->buffer; \ - if (bufp->allocated == MAX_BUF_SIZE) \ - return REG_ESIZE; \ - bufp->allocated <<= 1; \ - if (bufp->allocated > MAX_BUF_SIZE) \ - bufp->allocated = MAX_BUF_SIZE; \ - bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ - if (bufp->buffer == NULL) \ - return REG_ESPACE; \ - /* If the buffer moved, move all the pointers into it. */ \ - if (old_buffer != bufp->buffer) \ - { \ - b = (b - old_buffer) + bufp->buffer; \ - begalt = (begalt - old_buffer) + bufp->buffer; \ - if (fixup_alt_jump) \ - fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ - if (laststart) \ - laststart = (laststart - old_buffer) + bufp->buffer; \ - if (pending_exact) \ - pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ - } \ - } while (0) - - -/* Since we have one byte reserved for the register number argument to - {start,stop}_memory, the maximum number of groups we can report - things about is what fits in that byte. */ -#define MAX_REGNUM 255 - -/* But patterns can have more than `MAX_REGNUM' registers. We just - ignore the excess. */ -typedef unsigned regnum_t; - - -/* Macros for the compile stack. */ - -/* Since offsets can go either forwards or backwards, this type needs to - be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ -typedef int pattern_offset_t; - -typedef struct -{ - pattern_offset_t begalt_offset; - pattern_offset_t fixup_alt_jump; - pattern_offset_t inner_group_offset; - pattern_offset_t laststart_offset; - regnum_t regnum; -} compile_stack_elt_t; - - -typedef struct -{ - compile_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} compile_stack_type; - - -#define INIT_COMPILE_STACK_SIZE 32 - -#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) -#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) - -/* The next available element. */ -#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - - -/* Set the bit for character C in a list. */ -#define SET_LIST_BIT(c) \ - (b[((unsigned char) (c)) / BYTEWIDTH] \ - |= 1 << (((unsigned char) c) % BYTEWIDTH)) - - -/* Get the next unsigned number in the uncompiled pattern. */ -#define GET_UNSIGNED_NUMBER(num) \ - { if (p != pend) \ - { \ - PATFETCH (c); \ - while (ISDIGIT (c)) \ - { \ - if (num < 0) \ - num = 0; \ - num = num * 10 + c - '0'; \ - if (p == pend) \ - break; \ - PATFETCH (c); \ - } \ - } \ - } - -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ - -#define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) - -/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. - Returns one of error codes defined in `regex.h', or zero for success. - - Assumes the `allocated' (and perhaps `buffer') and `translate' - fields are set in BUFP on entry. - - If it succeeds, results are put in BUFP (if it returns an error, the - contents of BUFP are undefined): - `buffer' is the compiled pattern; - `syntax' is set to SYNTAX; - `used' is set to the length of the compiled pattern; - `fastmap_accurate' is zero; - `re_nsub' is the number of subexpressions in PATTERN; - `not_bol' and `not_eol' are zero; - - The `fastmap' and `newline_anchor' fields are neither - examined nor set. */ - -static reg_errcode_t -regex_compile (pattern, size, syntax, bufp) - const char *pattern; - int size; - reg_syntax_t syntax; - struct re_pattern_buffer *bufp; -{ - /* We fetch characters from PATTERN here. Even though PATTERN is - `char *' (i.e., signed), we declare these variables as unsigned, so - they can be reliably used as array indices. */ - register unsigned char c, c1; - - /* A random temporary spot in PATTERN. */ - const char *p1; - - /* Points to the end of the buffer, where we should append. */ - register unsigned char *b; - - /* Keeps track of unclosed groups. */ - compile_stack_type compile_stack; - - /* Points to the current (ending) position in the pattern. */ - const char *p = pattern; - const char *pend = pattern + size; - - /* How to translate the characters in the pattern. */ - char *translate = bufp->translate; - - /* Address of the count-byte of the most recently inserted `exactn' - command. This makes it possible to tell if a new exact-match - character can be added to that command or if the character requires - a new `exactn' command. */ - unsigned char *pending_exact = 0; - - /* Address of start of the most recently finished expression. - This tells, e.g., postfix * where to find the start of its - operand. Reset at the beginning of groups and alternatives. */ - unsigned char *laststart = 0; - - /* Address of beginning of regexp, or inside of last group. */ - unsigned char *begalt; - - /* Place in the uncompiled pattern (i.e., the {) to - which to go back if the interval is invalid. */ - const char *beg_interval; - - /* Address of the place where a forward jump should go to the end of - the containing expression. Each alternative of an `or' -- except the - last -- ends with a forward jump of this sort. */ - unsigned char *fixup_alt_jump = 0; - - /* Counts open-groups as they are encountered. Remembered for the - matching close-group on the compile stack, so the same register - number is put in the stop_memory as the start_memory. */ - regnum_t regnum = 0; - -#ifdef DEBUG - DEBUG_PRINT1 ("\nCompiling pattern: "); - if (debug) - { - unsigned debug_count; - - for (debug_count = 0; debug_count < size; debug_count++) - printchar (pattern[debug_count]); - putchar ('\n'); - } -#endif /* DEBUG */ - - /* Initialize the compile stack. */ - compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); - if (compile_stack.stack == NULL) - return REG_ESPACE; - - compile_stack.size = INIT_COMPILE_STACK_SIZE; - compile_stack.avail = 0; - - /* Initialize the pattern buffer. */ - bufp->syntax = syntax; - bufp->fastmap_accurate = 0; - bufp->not_bol = bufp->not_eol = 0; - - /* Set `used' to zero, so that if we return an error, the pattern - printer (for debugging) will think there's no pattern. We reset it - at the end. */ - bufp->used = 0; - - /* Always count groups, whether or not bufp->no_sub is set. */ - bufp->re_nsub = 0; - -#if !defined (emacs) && !defined (SYNTAX_TABLE) - /* Initialize the syntax table. */ - init_syntax_once (); -#endif - - if (bufp->allocated == 0) - { - if (bufp->buffer) - { /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. */ - RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); - } - else - { /* Caller did not allocate a buffer. Do it for them. */ - bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); - } - if (!bufp->buffer) return REG_ESPACE; - - bufp->allocated = INIT_BUF_SIZE; - } - - begalt = b = bufp->buffer; - - /* Loop through the uncompiled pattern until we're at the end. */ - while (p != pend) - { - PATFETCH (c); - - switch (c) - { - case '^': - { - if ( /* If at start of pattern, it's an operator. */ - p == pattern + 1 - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's come before. */ - || at_begline_loc_p (pattern, p, syntax)) - BUF_PUSH (begline); - else - goto normal_char; - } - break; - - - case '$': - { - if ( /* If at end of pattern, it's an operator. */ - p == pend - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's next. */ - || at_endline_loc_p (p, pend, syntax)) - BUF_PUSH (endline); - else - goto normal_char; - } - break; - - - case '+': - case '?': - if ((syntax & RE_BK_PLUS_QM) - || (syntax & RE_LIMITED_OPS)) - goto normal_char; - handle_plus: - case '*': - /* If there is no previous pattern... */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; - else if (!(syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } - - { - /* Are we optimizing this jump? */ - boolean keep_string_p = false; - - /* 1 means zero (many) matches is allowed. */ - char zero_times_ok = 0, many_times_ok = 0; - - /* If there is a sequence of repetition chars, collapse it - down to just one (the right one). We can't combine - interval operators with these because of, e.g., `a{2}*', - which should only match an even number of `a's. */ - - for (;;) - { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; - - if (p == pend) - break; - - PATFETCH (c); - - if (c == '*' - || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) - ; - - else if (syntax & RE_BK_PLUS_QM && c == '\\') - { - if (p == pend) return REG_EESCAPE; - - PATFETCH (c1); - if (!(c1 == '+' || c1 == '?')) - { - PATUNFETCH; - PATUNFETCH; - break; - } - - c = c1; - } - else - { - PATUNFETCH; - break; - } - - /* If we get here, we found another repeat character. */ - } - - /* Star, etc. applied to an empty pattern is equivalent - to an empty pattern. */ - if (!laststart) - break; - - /* Now we know whether or not zero matches is allowed - and also whether or not two or more matches is allowed. */ - if (many_times_ok) - { /* More than one repetition is allowed, so put in at the - end a backward relative jump from `b' to before the next - jump we're going to put in below (which jumps from - laststart to after this jump). - - But if we are at the `*' in the exact sequence `.*\n', - insert an unconditional jump backwards to the ., - instead of the beginning of the loop. This way we only - push a failure point once, instead of every time - through the loop. */ - assert (p - 1 > pattern); - - /* Allocate the space for the jump. */ - GET_BUFFER_SPACE (3); - - /* We know we are not at the first character of the pattern, - because laststart was nonzero. And we've already - incremented `p', by the way, to be the character after - the `*'. Do we have to do something analogous here - for null bytes, because of RE_DOT_NOT_NULL? */ - if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') - && zero_times_ok - && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') - && !(syntax & RE_DOT_NEWLINE)) - { /* We have .*\n. */ - STORE_JUMP (jump, b, laststart); - keep_string_p = true; - } - else - /* Anything else. */ - STORE_JUMP (maybe_pop_jump, b, laststart - 3); - - /* We've added more stuff to the buffer. */ - b += 3; - } - - /* On failure, jump from laststart to b + 3, which will be the - end of the buffer after this jump is inserted. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump - : on_failure_jump, - laststart, b + 3); - pending_exact = 0; - b += 3; - - if (!zero_times_ok) - { - /* At least one repetition is required, so insert a - `dummy_failure_jump' before the initial - `on_failure_jump' instruction of the loop. This - effects a skip over that instruction the first time - we hit that loop. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); - b += 3; - } - } - break; - - - case '.': - laststart = b; - BUF_PUSH (anychar); - break; - - - case '[': - { - boolean had_char_class = false; - - if (p == pend) return REG_EBRACK; - - /* Ensure that we have enough space to push a charset: the - opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); - - laststart = b; - - /* We test `*p == '^' twice, instead of using an if - statement, so we only need one BUF_PUSH. */ - BUF_PUSH (*p == '^' ? charset_not : charset); - if (*p == '^') - p++; - - /* Remember the first position in the bracket expression. */ - p1 = p; - - /* Push the number of bytes in the bitmap. */ - BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); - - /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); - - /* charset_not matches newline according to a syntax bit. */ - if ((re_opcode_t) b[-2] == charset_not - && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) - SET_LIST_BIT ('\n'); - - /* Read in characters and ranges, setting map bits. */ - for (;;) - { - if (p == pend) return REG_EBRACK; - - PATFETCH (c); - - /* \ might escape characters inside [...] and [^...]. */ - if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') - { - if (p == pend) return REG_EESCAPE; - - PATFETCH (c1); - SET_LIST_BIT (c1); - continue; - } - - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p != p1 + 1) - break; - - /* Look ahead to see if it's a range when the last thing - was a character class. */ - if (had_char_class && c == '-' && *p != ']') - return REG_ERANGE; - - /* Look ahead to see if it's a range when the last thing - was a character: if this is a hyphen not at the - beginning or the end of a list, then it's the range - operator. */ - if (c == '-' - && !(p - 2 >= pattern && p[-2] == '[') - && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') - && *p != ']') - { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; - } - - else if (p[0] == '-' && p[1] != ']') - { /* This handles ranges made up of characters only. */ - reg_errcode_t ret; - - /* Move past the `-'. */ - PATFETCH (c1); - - ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; - } - - /* See if we're at the beginning of a possible character - class. */ - - else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') - { /* Leave room for the null. */ - char str[CHAR_CLASS_MAX_LENGTH + 1]; - - PATFETCH (c); - c1 = 0; - - /* If pattern is `[[:'. */ - if (p == pend) return REG_EBRACK; - - for (;;) - { - PATFETCH (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) - break; - str[c1++] = c; - } - str[c1] = '\0'; - - /* If isn't a word bracketed by `[:' and:`]': - undo the ending character, the letters, and leave - the leading `:' and `[' (but set bits for them). */ - if (c == ':' && *p == ']') - { - int ch; - boolean is_alnum = STREQ (str, "alnum"); - boolean is_alpha = STREQ (str, "alpha"); - boolean is_blank = STREQ (str, "blank"); - boolean is_cntrl = STREQ (str, "cntrl"); - boolean is_digit = STREQ (str, "digit"); - boolean is_graph = STREQ (str, "graph"); - boolean is_lower = STREQ (str, "lower"); - boolean is_print = STREQ (str, "print"); - boolean is_punct = STREQ (str, "punct"); - boolean is_space = STREQ (str, "space"); - boolean is_upper = STREQ (str, "upper"); - boolean is_xdigit = STREQ (str, "xdigit"); - - if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; - - /* Throw away the ] at the end of the character - class. */ - PATFETCH (c); - - if (p == pend) return REG_EBRACK; - - for (ch = 0; ch < 1 << BYTEWIDTH; ch++) - { - if ( (is_alnum && ISALNUM (ch)) - || (is_alpha && ISALPHA (ch)) - || (is_blank && ISBLANK (ch)) - || (is_cntrl && ISCNTRL (ch)) - || (is_digit && ISDIGIT (ch)) - || (is_graph && ISGRAPH (ch)) - || (is_lower && ISLOWER (ch)) - || (is_print && ISPRINT (ch)) - || (is_punct && ISPUNCT (ch)) - || (is_space && ISSPACE (ch)) - || (is_upper && ISUPPER (ch)) - || (is_xdigit && ISXDIGIT (ch))) - SET_LIST_BIT (ch); - } - had_char_class = true; - } - else - { - c1++; - while (c1--) - PATUNFETCH; - SET_LIST_BIT ('['); - SET_LIST_BIT (':'); - had_char_class = false; - } - } - else - { - had_char_class = false; - SET_LIST_BIT (c); - } - } - - /* Discard any (non)matching list bytes that are all 0 at the - end of the map. Decrease the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; - b += b[-1]; - } - break; - - - case '(': - if (syntax & RE_NO_BK_PARENS) - goto handle_open; - else - goto normal_char; - - - case ')': - if (syntax & RE_NO_BK_PARENS) - goto handle_close; - else - goto normal_char; - - - case '\n': - if (syntax & RE_NEWLINE_ALT) - goto handle_alt; - else - goto normal_char; - - - case '|': - if (syntax & RE_NO_BK_VBAR) - goto handle_alt; - else - goto normal_char; - - - case '{': - if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) - goto handle_interval; - else - goto normal_char; - - - case '\\': - if (p == pend) return REG_EESCAPE; - - /* Do not translate the character after the \, so that we can - distinguish, e.g., \B from \b, even if we normally would - translate, e.g., B to b. */ - PATFETCH_RAW (c); - - switch (c) - { - case '(': - if (syntax & RE_NO_BK_PARENS) - goto normal_backslash; - - handle_open: - bufp->re_nsub++; - regnum++; - - if (COMPILE_STACK_FULL) - { - RETALLOC (compile_stack.stack, compile_stack.size << 1, - compile_stack_elt_t); - if (compile_stack.stack == NULL) return REG_ESPACE; - - compile_stack.size <<= 1; - } - - /* These are the values to restore when we hit end of this - group. They are all relative offsets, so that if the - whole pattern moves because of realloc, they will still - be valid. */ - COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; - COMPILE_STACK_TOP.fixup_alt_jump - = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; - COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; - COMPILE_STACK_TOP.regnum = regnum; - - /* We will eventually replace the 0 with the number of - groups inner to this one. But do not push a - start_memory for groups beyond the last one we can - represent in the compiled pattern. */ - if (regnum <= MAX_REGNUM) - { - COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; - BUF_PUSH_3 (start_memory, regnum, 0); - } - - compile_stack.avail++; - - fixup_alt_jump = 0; - laststart = 0; - begalt = b; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - break; - - - case ')': - if (syntax & RE_NO_BK_PARENS) goto normal_backslash; - - if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - return REG_ERPAREN; - } - - handle_close: - if (fixup_alt_jump) - { /* Push a dummy failure point at the end of the - alternative for a possible future - `pop_failure_jump' to pop. See comments at - `push_dummy_failure' in `re_match_2'. */ - BUF_PUSH (push_dummy_failure); - - /* We allocated space for this jump when we assigned - to `fixup_alt_jump', in the `handle_alt' case below. */ - STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); - } - - /* See similar code for backslashed left paren above. */ - if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - return REG_ERPAREN; - } - - /* Since we just checked for an empty stack above, this - ``can't happen''. */ - assert (compile_stack.avail != 0); - { - /* We don't just want to restore into `regnum', because - later groups should continue to be numbered higher, - as in `(ab)c(de)' -- the second group is #2. */ - regnum_t this_group_regnum; - - compile_stack.avail--; - begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; - fixup_alt_jump - = COMPILE_STACK_TOP.fixup_alt_jump - ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 - : 0; - laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; - this_group_regnum = COMPILE_STACK_TOP.regnum; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - - /* We're at the end of the group, so now we know how many - groups were inside this one. */ - if (this_group_regnum <= MAX_REGNUM) - { - unsigned char *inner_group_loc - = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; - - *inner_group_loc = regnum - this_group_regnum; - BUF_PUSH_3 (stop_memory, this_group_regnum, - regnum - this_group_regnum); - } - } - break; - - - case '|': /* `\|'. */ - if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) - goto normal_backslash; - handle_alt: - if (syntax & RE_LIMITED_OPS) - goto normal_char; - - /* Insert before the previous alternative a jump which - jumps to this alternative if the former fails. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (on_failure_jump, begalt, b + 6); - pending_exact = 0; - b += 3; - - /* The alternative before this one has a jump after it - which gets executed if it gets matched. Adjust that - jump so it will jump to this alternative's analogous - jump (put in below, which in turn will jump to the next - (if any) alternative's such jump, etc.). The last such - jump jumps to the correct final destination. A picture: - _____ _____ - | | | | - | v | v - a | b | c - - If we are at `b', then fixup_alt_jump right now points to a - three-byte space after `a'. We'll put in the jump, set - fixup_alt_jump to right after `b', and leave behind three - bytes which we'll fill in when we get to after `c'. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - /* Mark and leave space for a jump after this alternative, - to be filled in later either by next alternative or - when know we're at the end of a series of alternatives. */ - fixup_alt_jump = b; - GET_BUFFER_SPACE (3); - b += 3; - - laststart = 0; - begalt = b; - break; - - - case '{': - /* If \{ is a literal. */ - if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval - operator. */ - || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - || (p - 2 == pattern && p == pend)) - goto normal_backslash; - - handle_interval: - { - /* If got here, then the syntax allows intervals. */ - - /* At least (most) this many matches must be made. */ - int lower_bound = -1, upper_bound = -1; - - beg_interval = p - 1; - - if (p == pend) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_EBRACE; - } - - GET_UNSIGNED_NUMBER (lower_bound); - - if (c == ',') - { - GET_UNSIGNED_NUMBER (upper_bound); - if (upper_bound < 0) upper_bound = RE_DUP_MAX; - } - else - /* Interval such as `{1}' => match exactly once. */ - upper_bound = lower_bound; - - if (lower_bound < 0 || upper_bound > RE_DUP_MAX - || lower_bound > upper_bound) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_BADBR; - } - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (c != '\\') return REG_EBRACE; - - PATFETCH (c); - } - - if (c != '}') - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - return REG_BADBR; - } - - /* We just parsed a valid interval. */ - - /* If it's invalid to have no preceding re. */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; - else if (syntax & RE_CONTEXT_INDEP_OPS) - laststart = b; - else - goto unfetch_interval; - } - - /* If the upper bound is zero, don't want to succeed at - all; jump from `laststart' to `b + 3', which will be - the end of the buffer after we insert the jump. */ - if (upper_bound == 0) - { - GET_BUFFER_SPACE (3); - INSERT_JUMP (jump, laststart, b + 3); - b += 3; - } - - /* Otherwise, we have a nontrivial interval. When - we're all done, the pattern will look like: - set_number_at <jump count> <upper bound> - set_number_at <succeed_n count> <lower bound> - succeed_n <after jump addr> <succeed_n count> - <body of loop> - jump_n <succeed_n addr> <jump count> - (The upper bound and `jump_n' are omitted if - `upper_bound' is 1, though.) */ - else - { /* If the upper bound is > 1, we need to insert - more at the end of the loop. */ - unsigned nbytes = 10 + (upper_bound > 1) * 10; - - GET_BUFFER_SPACE (nbytes); - - /* Initialize lower bound of the `succeed_n', even - though it will be set during matching by its - attendant `set_number_at' (inserted next), - because `re_compile_fastmap' needs to know. - Jump to the `jump_n' we might insert below. */ - INSERT_JUMP2 (succeed_n, laststart, - b + 5 + (upper_bound > 1) * 5, - lower_bound); - b += 5; - - /* Code to initialize the lower bound. Insert - before the `succeed_n'. The `5' is the last two - bytes of this `set_number_at', plus 3 bytes of - the following `succeed_n'. */ - insert_op2 (set_number_at, laststart, 5, lower_bound, b); - b += 5; - - if (upper_bound > 1) - { /* More than one repetition is allowed, so - append a backward jump to the `succeed_n' - that starts this interval. - - When we've reached this during matching, - we'll have matched the interval once, so - jump back only `upper_bound - 1' times. */ - STORE_JUMP2 (jump_n, b, laststart + 5, - upper_bound - 1); - b += 5; - - /* The location we want to set is the second - parameter of the `jump_n'; that is `b-2' as - an absolute address. `laststart' will be - the `set_number_at' we're about to insert; - `laststart+3' the number to set, the source - for the relative address. But we are - inserting into the middle of the pattern -- - so everything is getting moved up by 5. - Conclusion: (b - 2) - (laststart + 3) + 5, - i.e., b - laststart. - - We insert this at the beginning of the loop - so that if we fail during matching, we'll - reinitialize the bounds. */ - insert_op2 (set_number_at, laststart, b - laststart, - upper_bound - 1, b); - b += 5; - } - } - pending_exact = 0; - beg_interval = NULL; - } - break; - - unfetch_interval: - /* If an invalid interval, match the characters as literals. */ - assert (beg_interval); - p = beg_interval; - beg_interval = NULL; - - /* normal_char and normal_backslash need `c'. */ - PATFETCH (c); - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (p > pattern && p[-1] == '\\') - goto normal_backslash; - } - goto normal_char; - -#ifdef emacs - /* There is no way to specify the before_dot and after_dot - operators. rms says this is ok. --karl */ - case '=': - BUF_PUSH (at_dot); - break; - - case 's': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); - break; - - case 'S': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); - break; -#endif /* emacs */ - - - case 'w': - laststart = b; - BUF_PUSH (wordchar); - break; - - - case 'W': - laststart = b; - BUF_PUSH (notwordchar); - break; - - - case '<': - BUF_PUSH (wordbeg); - break; - - case '>': - BUF_PUSH (wordend); - break; - - case 'b': - BUF_PUSH (wordbound); - break; - - case 'B': - BUF_PUSH (notwordbound); - break; - - case '`': - BUF_PUSH (begbuf); - break; - - case '\'': - BUF_PUSH (endbuf); - break; - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (syntax & RE_NO_BK_REFS) - goto normal_char; - - c1 = c - '0'; - - if (c1 > regnum) - return REG_ESUBREG; - - /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, c1)) - goto normal_char; - - laststart = b; - BUF_PUSH_2 (duplicate, c1); - break; - - - case '+': - case '?': - if (syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backslash; - - default: - normal_backslash: - /* You might think it would be useful for \ to mean - not to translate; but if we don't translate it - it will never match anything. */ - c = TRANSLATE (c); - goto normal_char; - } - break; - - - default: - /* Expects the character in `c'. */ - normal_char: - /* If no exactn currently being built. */ - if (!pending_exact - - /* If last exactn not at current position. */ - || pending_exact + *pending_exact + 1 != b - - /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 - - /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' - || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) - || ((syntax & RE_INTERVALS) - && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) - { - /* Start building a new exactn. */ - - laststart = b; - - BUF_PUSH_2 (exactn, 0); - pending_exact = b - 1; - } - - BUF_PUSH (c); - (*pending_exact)++; - break; - } /* switch (c) */ - } /* while p != pend */ - - - /* Through the pattern now. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - if (!COMPILE_STACK_EMPTY) - return REG_EPAREN; - - free (compile_stack.stack); - - /* We have succeeded; set the length of the buffer. */ - bufp->used = b - bufp->buffer; - -#ifdef DEBUG - if (debug) - { - DEBUG_PRINT1 ("\nCompiled pattern: "); - print_compiled_pattern (bufp); - } -#endif /* DEBUG */ - - return REG_NOERROR; -} /* regex_compile */ - -/* Subroutines for `regex_compile'. */ - -/* Store OP at LOC followed by two-byte integer parameter ARG. */ - -static void -store_op1 (op, loc, arg) - re_opcode_t op; - unsigned char *loc; - int arg; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg); -} - - -/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -store_op2 (op, loc, arg1, arg2) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg1); - STORE_NUMBER (loc + 3, arg2); -} - - -/* Copy the bytes from LOC to END to open up three bytes of space at LOC - for OP followed by two-byte integer parameter ARG. */ - -static void -insert_op1 (op, loc, arg, end) - re_opcode_t op; - unsigned char *loc; - int arg; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 3; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op1 (op, loc, arg); -} - - -/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -insert_op2 (op, loc, arg1, arg2, end) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 5; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op2 (op, loc, arg1, arg2); -} - - -/* P points to just after a ^ in PATTERN. Return true if that ^ comes - after an alternative or a begin-subexpression. We assume there is at - least one character before the ^. */ - -static boolean -at_begline_loc_p (pattern, p, syntax) - const char *pattern, *p; - reg_syntax_t syntax; -{ - const char *prev = p - 2; - boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; - - return - /* After a subexpression? */ - (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) - /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); -} - - -/* The dual of at_begline_loc_p. This one is for $. We assume there is - at least one character after the $, i.e., `P < PEND'. */ - -static boolean -at_endline_loc_p (p, pend, syntax) - const char *p, *pend; - int syntax; -{ - const char *next = p; - boolean next_backslash = *next == '\\'; - const char *next_next = p + 1 < pend ? p + 1 : NULL; - - return - /* Before a subexpression? */ - (syntax & RE_NO_BK_PARENS ? *next == ')' - : next_backslash && next_next && *next_next == ')') - /* Before an alternative? */ - || (syntax & RE_NO_BK_VBAR ? *next == '|' - : next_backslash && next_next && *next_next == '|'); -} - - -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and - false if it's not. */ - -static boolean -group_in_compile_stack (compile_stack, regnum) - compile_stack_type compile_stack; - regnum_t regnum; -{ - int this_element; - - for (this_element = compile_stack.avail - 1; - this_element >= 0; - this_element--) - if (compile_stack.stack[this_element].regnum == regnum) - return true; - - return false; -} - - -/* Read the ending character of a range (in a bracket expression) from the - uncompiled pattern *P_PTR (which ends at PEND). We assume the - starting character is in `P[-2]'. (`P[-1]' is the character `-'.) - Then we set the translation of all bits between the starting and - ending characters (inclusive) in the compiled pattern B. - - Return an error code. - - We use these short variable names so we can use the same macros as - `regex_compile' itself. */ - -static reg_errcode_t -compile_range (p_ptr, pend, translate, syntax, b) - const char **p_ptr, *pend; - char *translate; - reg_syntax_t syntax; - unsigned char *b; -{ - unsigned this_char; - - const char *p = *p_ptr; - int range_start, range_end; - - if (p == pend) - return REG_ERANGE; - - /* Even though the pattern is a signed `char *', we need to fetch - with unsigned char *'s; if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch using a - signed char *. - - We also want to fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - range_start = ((unsigned char *) p)[-2]; - range_end = ((unsigned char *) p)[0]; - - /* Have to increment the pointer into the pattern string, so the - caller isn't still at the ending character. */ - (*p_ptr)++; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- the range is inclusive, so if `range_end' == 0xff - (assuming 8-bit characters), we would otherwise go into an infinite - loop, since all characters <= 0xff. */ - for (this_char = range_start; this_char <= range_end; this_char++) - { - SET_LIST_BIT (TRANSLATE (this_char)); - } - - return REG_NOERROR; -} - -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -#define INIT_FAILURE_ALLOC 5 -#endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Make sure noone compiles this code with a C++ compiler. */ +#ifdef __cplusplus +# error "This is C code, use a C compiler" +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" +#endif + +#if defined (_MSC_VER) +#include <stdio.h> /* for size_t */ +#endif + +/* On some systems, limits.h sets RE_DUP_MAX to a lower value than + GNU regex allows. Include it before <regex.h>, which correctly + #undefs RE_DUP_MAX and sets it to the right value. */ +#include <limits.h> + +#ifdef GAWK +#undef alloca +#define alloca alloca_is_bad_you_should_never_use_it +#endif +#include <regex.h> +#include "regex_internal.h" + +#include "regex_internal.c" +#ifdef GAWK +#define bool int +#define true (1) +#define false (0) +#endif +#include "regcomp.c" +#include "regexec.c" + +/* Binary backward compatibility. */ +#if _LIBC +# include <shlib-compat.h> +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) +link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") int re_max_failures = 2000; - -typedef const unsigned char *fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} fail_stack_type; - -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) -#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) - - -/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ - -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE requires `destination' be declared. */ - -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ - ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ - REGEX_REALLOCATE ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push PATTERN_OP on FAIL_STACK. - - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (fail_stack)) \ - ? 0 \ - : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ - 1)) - -/* This pushes an item onto the failure stack. Must be a four-byte - value. Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ITEM(item) \ - fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item - -/* The complement operation. Assumes `fail_stack' is nonempty. */ -#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] - -/* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -#define DEBUG_PUSH PUSH_FAILURE_ITEM -#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () -#else -#define DEBUG_PUSH(item) -#define DEBUG_POP(item_addr) -#endif - - -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be - declared. - - Does `return FAILURE_CODE' if runs out of memory. */ - -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - int this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - PUSH_FAILURE_ITEM (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - PUSH_FAILURE_ITEM (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ - PUSH_FAILURE_ITEM (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ - PUSH_FAILURE_ITEM (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_ITEM (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_ITEM (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) - -/* This is the number of items that are pushed and popped on the stack - for each register. */ -#define NUM_REG_ITEMS 3 - -/* Individual items aside from the registers. */ -#ifdef DEBUG -#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -#define NUM_NONREG_ITEMS 4 -#endif - -/* We push at most this many items on the stack. */ -#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) - -/* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ - ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ - + NUM_NONREG_ITEMS) - -/* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) - - -/* Pops what PUSH_FAIL_STACK pushes. - - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ - -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ - int this_reg; \ - const unsigned char *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_ITEM (); \ - if (string_temp != NULL) \ - str = (const char *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (unsigned char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ - \ - low_reg = (unsigned) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ - \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ - \ - regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - } \ - \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ - -/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in - BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible - characters can start a string that matches the pattern. This fastmap - is used by re_search to skip quickly over impossible starting points. - - The caller must supply the address of a (1 << BYTEWIDTH)-byte data - area as BUFP->fastmap. - - We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in - the pattern buffer. - - Returns 0 if we succeed, -2 if an internal error. */ - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - int j, k; - fail_stack_type fail_stack; -#ifndef REGEX_MALLOC - char *destination; -#endif - /* We don't push any register information onto the failure stack. */ - unsigned num_regs = 0; - - register char *fastmap = bufp->fastmap; - unsigned char *pattern = bufp->buffer; - unsigned long size = bufp->used; - const unsigned char *p = pattern; - register unsigned char *pend = pattern + size; - - /* Assume that each path through the pattern can be null until - proven otherwise. We set this false at the bottom of switch - statement, to which we get only if a particular path doesn't - match the empty string. */ - boolean path_can_be_null = true; - - /* We aren't doing a `succeed_n' to begin with. */ - boolean succeed_n_p = false; - - assert (fastmap != NULL && p != NULL); - - INIT_FAIL_STACK (); - bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ - bufp->fastmap_accurate = 1; /* It will be when we're done. */ - bufp->can_be_null = 0; - - while (p != pend || !FAIL_STACK_EMPTY ()) - { - if (p == pend) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail]; - } - - /* We should never be about to go beyond the end of the pattern. */ - assert (p < pend); - -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - - /* I guess the idea here is to simply not bother with a fastmap - if a backreference is used, since it's too hard to figure out - the fastmap for the corresponding group. Setting - `can_be_null' stops `re_search_2' from using the fastmap, so - that is all we do. */ - case duplicate: - bufp->can_be_null = 1; - return 0; - - - /* Following are the cases which match a character. These end - with `break'. */ - - case exactn: - fastmap[p[1]] = 1; - break; - - - case charset: - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) - fastmap[j] = 1; - break; - - - case charset_not: - /* Chars beyond end of map must be allowed. */ - for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; - break; - - - case wordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == Sword) - fastmap[j] = 1; - break; - - - case notwordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != Sword) - fastmap[j] = 1; - break; - - - case anychar: - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = 0; - - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - return 0; - - /* Otherwise, have to check alternative paths. */ - break; - - -#ifdef emacs - case syntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - case notsyntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - /* All cases after this match the empty string. These end with - `continue'. */ - - - case before_dot: - case at_dot: - case after_dot: - continue; -#endif /* not emacs */ - - - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbound: - case notwordbound: - case wordbeg: - case wordend: - case push_dummy_failure: - continue; - - - case jump_n: - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case jump_past_alt: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (j > 0) - continue; - - /* Jump backward implies we just went through the body of a - loop and matched nothing. Opcode jumped to should be - `on_failure_jump' or `succeed_n'. Just treat it like an - ordinary jump. For a * loop, it has pushed its failure - point already; if so, discard that as redundant. */ - if ((re_opcode_t) *p != on_failure_jump - && (re_opcode_t) *p != succeed_n) - continue; - - p++; - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - - /* If what's on the stack is where we are now, pop it. */ - if (!FAIL_STACK_EMPTY () - && fail_stack.stack[fail_stack.avail - 1] == p) - fail_stack.avail--; - - continue; - - - case on_failure_jump: - case on_failure_keep_string_jump: - handle_on_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - - /* For some patterns, e.g., `(a?)?', `p+j' here points to the - end of the pattern. We don't want to push such a point, - since when we restore it above, entering the switch will - increment `p' past the end of the pattern. We don't need - to push such a point since we obviously won't find any more - fastmap entries beyond `pend'. Such a pattern can match - the null string, though. */ - if (p + j < pend) - { - if (!PUSH_PATTERN_OP (p + j, fail_stack)) - return -2; - } - else - bufp->can_be_null = 1; - - if (succeed_n_p) - { - EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ - succeed_n_p = false; - } - - continue; - - - case succeed_n: - /* Get to the number of times to succeed. */ - p += 2; - - /* Increment p past the n for when k != 0. */ - EXTRACT_NUMBER_AND_INCR (k, p); - if (k == 0) - { - p -= 4; - succeed_n_p = true; /* Spaghetti code alert. */ - goto handle_on_failure_jump; - } - continue; - - - case set_number_at: - p += 4; - continue; - - - case start_memory: - case stop_memory: - p += 2; - continue; - - - default: - abort (); /* We have listed all the cases. */ - } /* switch *p++ */ - - /* Getting here means we have found the possible starting - characters for one path of the pattern -- and that the empty - string does not match. We need not follow this path further. - Instead, look at the next alternative (remembered on the - stack), or quit if no more. The test at the top of the loop - does these things. */ - path_can_be_null = false; - p = pend; - } /* while p */ - - /* Set `can_be_null' for the last path (also the first path, if the - pattern is empty). */ - bufp->can_be_null |= path_can_be_null; - return 0; -} /* re_compile_fastmap */ - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; - } -} - -/* Searching routines. */ - -/* Like re_search_2, below, but only one string is specified, and - doesn't let you say where to stop matching. */ - -int -re_search (bufp, string, size, startpos, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, startpos, range; - struct re_registers *regs; -{ - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, - regs, size); -} - - -/* Using the compiled pattern in BUFP->buffer, first tries to match the - virtual concatenation of STRING1 and STRING2, starting first at index - STARTPOS, then at STARTPOS + 1, and so on. - - STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. - - RANGE is how far to scan while trying to match. RANGE = 0 means try - only at STARTPOS; in general, the last start tried is STARTPOS + - RANGE. - - In REGS, return the indices of the virtual concatenation of STRING1 - and STRING2 that matched the entire BUFP->buffer and its contained - subexpressions. - - Do not consider matching one past the index STOP in the virtual - concatenation of STRING1 and STRING2. - - We return either the position in the strings at which the match was - found, -1 if no match, or -2 if error (such as failure - stack overflow). */ - -int -re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int startpos; - int range; - struct re_registers *regs; - int stop; -{ - int val; - register char *fastmap = bufp->fastmap; - register char *translate = bufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; - - /* Check for out-of-range STARTPOS. */ - if (startpos < 0 || startpos > total_size) - return -1; - - /* Fix up RANGE if it might eventually take us outside - the virtual concatenation of STRING1 and STRING2. */ - if (endpos < -1) - range = -1 - startpos; - else if (endpos > total_size) - range = total_size - startpos; - - /* If the search isn't to be a backwards one, don't waste time in a - search for a pattern that must be anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) - { - if (startpos > 0) - return -1; - else - range = 1; - } - - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) - if (re_compile_fastmap (bufp) == -2) - return -2; - - /* Loop through the string, looking for a place to start matching. */ - for (;;) - { - /* If a fastmap is supplied, skip quickly over characters that - cannot be the start of a match. If the pattern can match the - null string, however, we don't need to skip characters; we want - the first null string. */ - if (fastmap && startpos < total_size && !bufp->can_be_null) - { - if (range > 0) /* Searching forwards. */ - { - register const char *d; - register int lim = 0; - int irange = range; - - if (startpos < size1 && startpos + range >= size1) - lim = range - (size1 - startpos); - - d = (startpos >= size1 ? string2 - size1 : string1) + startpos; - - /* Written out as an if-else to avoid testing `translate' - inside the loop. */ - if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) - range--; - else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; - - startpos += irange - range; - } - else /* Searching backwards. */ - { - register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] - : string1[startpos]); - - if (!fastmap[(unsigned char) TRANSLATE (c)]) - goto advance; - } - } - - /* If can't match the null string, and that's all we have left, fail. */ - if (range >= 0 && startpos == total_size && fastmap - && !bufp->can_be_null) - return -1; - - val = re_match_2 (bufp, string1, size1, string2, size2, - startpos, regs, stop); - if (val >= 0) - return startpos; - - if (val == -2) - return -2; - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} /* re_search_2 */ - -/* Declarations and macros for re_match_2. */ - -static int bcmp_translate (); -static boolean alt_match_null_string_p (), - common_op_match_null_string_p (), - group_match_null_string_p (); - -/* Structure for per-register (a.k.a. per-group) information. - This must not be longer than one word, because we push this value - onto the failure stack. Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ -typedef union -{ - fail_stack_elt_t word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} register_info_type; - -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ - do \ - { \ - unsigned r; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - while (0) - - -/* This converts PTR, a pointer into one of the search strings `string1' - and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) - -/* Registers are set to a sentinel when they haven't yet matched. */ -#define REG_UNSET_VALUE ((char *) -1) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - - -/* Macros for dealing with the split strings in re_match_2. */ - -#define MATCHING_IN_FIRST_STRING (dend == end_match_1) - -/* Call before fetching a character with *d. This switches over to - string2 if necessary. */ -#define PREFETCH() \ - while (d == dend) \ - { \ - /* End of string2 => fail. */ \ - if (dend == end_match_2) \ - goto fail; \ - /* End of string1 => advance to string2. */ \ - d = string2; \ - dend = end_match_2; \ - } - - -/* Test if at very beginning or at very end of the virtual concatenation - of `string1' and `string2'. If only one string, it's `string2'. */ -#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END(d) ((d) == end2) - - -/* Test if D points to a character which is word-constituent. We have - two special cases to check for: if past the end of string1, look at - the first character in string2; and if before the beginning of - string2, look at the last character in string1. */ -#define WORDCHAR_P(d) \ - (SYNTAX ((d) == end1 ? *string2 \ - : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ - == Sword) - -/* Test if the character before D and the one at D differ with respect - to being word-constituent. */ -#define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ - || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - - -/* Free everything we malloc. */ -#ifdef REGEX_MALLOC -#define FREE_VAR(var) if (var) free (var); var = NULL -#define FREE_VARIABLES() \ - do { \ - FREE_VAR (fail_stack.stack); \ - FREE_VAR (regstart); \ - FREE_VAR (regend); \ - FREE_VAR (old_regstart); \ - FREE_VAR (old_regend); \ - FREE_VAR (best_regstart); \ - FREE_VAR (best_regend); \ - FREE_VAR (reg_info); \ - FREE_VAR (reg_dummy); \ - FREE_VAR (reg_info_dummy); \ - } while (0) -#else /* not REGEX_MALLOC */ -/* Some MIPS systems (at least) want this to free alloca'd storage. */ -#define FREE_VARIABLES() alloca (0) -#endif /* not REGEX_MALLOC */ - - -/* These values must meet several constraints. They must not be valid - register values; since we have a limit of 255 registers (because - we use only one byte in the pattern for the register number), we can - use numbers larger than 255. They must differ by 1, because of - NUM_FAILURE_ITEMS above. And the value for the lowest register must - be larger than the value for the highest register, so we do not try - to actually save any registers when none are active. */ -#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) -#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) - -/* Matching routines. */ - -#ifndef emacs /* Emacs never uses this. */ -/* re_match is like re_match_2 except it takes only a single string. */ - -int -re_match (bufp, string, size, pos, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, pos; - struct re_registers *regs; - { - return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); -} -#endif /* not emacs */ - - -/* re_match_2 matches the compiled pattern in BUFP against the - (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 - and SIZE2, respectively). We start matching at POS, and stop - matching at STOP. - - If REGS is non-null and the `no_sub' field of BUFP is nonzero, we - store offsets for the substring each group matched in REGS. See the - documentation for exactly how many groups we fill. - - We return -1 if no match, -2 if an internal error (such as the - failure stack overflowing). Otherwise, we return the length of the - matched substring. */ - -int -re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - /* General temporaries. */ - int mcnt; - unsigned char *p1; - - /* Just past the end of the corresponding string. */ - const char *end1, *end2; - - /* Pointers into string1 and string2, just past the last characters in - each to consider matching. */ - const char *end_match_1, *end_match_2; - - /* Where we are in the data, and the end of the current string. */ - const char *d, *dend; - - /* Where we are in the pattern, and the end of the pattern. */ - unsigned char *p = bufp->buffer; - register unsigned char *pend = p + bufp->used; - - /* We use this to map every character in the string. */ - char *translate = bufp->translate; - - /* Failure point stack. Each place that can handle a failure further - down the line pushes a failure point on this stack. It consists of - restart, regend, and reg_info for all registers corresponding to - the subexpressions we're currently inside, plus the number of such - registers, and, finally, two char *'s. The first char * is where - to resume scanning the pattern; the second one is where to resume - scanning the strings. If the latter is zero, the failure point is - a ``dummy''; if a failure happens and the failure point is a dummy, - it gets discarded and the next next one is tried. */ - fail_stack_type fail_stack; -#ifdef DEBUG - static unsigned failure_id = 0; - unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; -#endif - - /* We fill all the registers internally, independent of what we - return, for use in backreferences. The number here includes - an element for register zero. */ - unsigned num_regs = bufp->re_nsub + 1; - - /* The currently active registers. */ - unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; - unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; - - /* Information on the contents of registers. These are pointers into - the input strings; they record just what was matched (on this - attempt) by a subexpression part of the pattern, that is, the - regnum-th regstart pointer points to where in the pattern we began - matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ - const char **regstart = NULL, **regend = NULL; - - /* If a group that's operated upon by a repetition operator fails to - match anything, then the register for its start will need to be - restored because it will have been set to wherever in the string we - are when we last see its open-group operator. Similarly for a - register's end. */ - const char **old_regstart = NULL, **old_regend = NULL; - - /* The is_active field of reg_info helps us keep track of which (possibly - nested) subexpressions we are currently in. The matched_something - field of reg_info[reg_num] helps us tell whether or not we have - matched any of the pattern so far this time through the reg_num-th - subexpression. These two fields get reset each time through any - loop their register is in. */ - register_info_type *reg_info = NULL; - - /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. - This happens as we backtrack through the failure points, which in - turn happens only if we have not yet matched the entire string. */ - unsigned best_regs_set = false; - const char **best_regstart = NULL, **best_regend = NULL; - - /* Logically, this is `best_regend[0]'. But we don't want to have to - allocate space for that if we're not allocating space for anything - else (see below). Also, we never need info about register 0 for - any of the other register vectors, and it seems rather a kludge to - treat `best_regend' differently than the rest. So we keep track of - the end of the best match so far in a separate variable. We - initialize this to NULL so that when we backtrack the first time - and need to test it, it's not garbage. */ - const char *match_end = NULL; - - /* Used when we pop values we don't care about. */ - const char **reg_dummy = NULL; - register_info_type *reg_info_dummy = NULL; - -#ifdef DEBUG - /* Counts the total number of registers pushed. */ - unsigned num_regs_pushed = 0; -#endif - - DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); - - INIT_FAIL_STACK (); - - /* Do not bother to initialize all the register variables if there are - no groups in the pattern, as it takes a fair amount of time. If - there are groups, we include space for register 0 (the whole - pattern), even though we never use it, since it simplifies the - array indexing. We should fix this. */ - if (bufp->re_nsub) - { - regstart = REGEX_TALLOC (num_regs, const char *); - regend = REGEX_TALLOC (num_regs, const char *); - old_regstart = REGEX_TALLOC (num_regs, const char *); - old_regend = REGEX_TALLOC (num_regs, const char *); - best_regstart = REGEX_TALLOC (num_regs, const char *); - best_regend = REGEX_TALLOC (num_regs, const char *); - reg_info = REGEX_TALLOC (num_regs, register_info_type); - reg_dummy = REGEX_TALLOC (num_regs, const char *); - reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); - - if (!(regstart && regend && old_regstart && old_regend && reg_info - && best_regstart && best_regend && reg_dummy && reg_info_dummy)) - { - FREE_VARIABLES (); - return -2; - } - } -#ifdef REGEX_MALLOC - else - { - /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. */ - regstart = regend = old_regstart = old_regend = best_regstart - = best_regend = reg_dummy = NULL; - reg_info = reg_info_dummy = (register_info_type *) NULL; - } -#endif /* REGEX_MALLOC */ - - /* The starting position is bogus. */ - if (pos < 0 || pos > size1 + size2) - { - FREE_VARIABLES (); - return -1; - } - - /* Initialize subexpression text positions to -1 to mark ones that no - start_memory/stop_memory has been seen for. Also initialize the - register information struct. */ - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = regend[mcnt] - = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; - - REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; - IS_ACTIVE (reg_info[mcnt]) = 0; - MATCHED_SOMETHING (reg_info[mcnt]) = 0; - EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; - } - - /* We move `string1' into `string2' if the latter's empty -- but not if - `string1' is null. */ - if (size2 == 0 && string1 != NULL) - { - string2 = string1; - size2 = size1; - string1 = 0; - size1 = 0; - } - end1 = string1 + size1; - end2 = string2 + size2; - - /* Compute where to stop matching, within the two strings. */ - if (stop <= size1) - { - end_match_1 = string1 + stop; - end_match_2 = string2; - } - else - { - end_match_1 = end1; - end_match_2 = string2 + stop - size1; - } - - /* `p' scans through the pattern as `d' scans through the data. - `dend' is the end of the input string that `d' points within. `d' - is advanced into the following input string whenever necessary, but - this happens before fetching; therefore, at the beginning of the - loop, `d' can be pointing at the end of a string, but it cannot - equal `string2'. */ - if (size1 > 0 && pos <= size1) - { - d = string1 + pos; - dend = end_match_1; - } - else - { - d = string2 + pos - size1; - dend = end_match_2; - } - - DEBUG_PRINT1 ("The compiled pattern is: "); - DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); - DEBUG_PRINT1 ("The string to match is: `"); - DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); - DEBUG_PRINT1 ("'\n"); - - /* This loops over pattern commands. It exits by returning from the - function if the match is complete, or it drops through if the match - fails at this starting point in the input data. */ - for (;;) - { - DEBUG_PRINT2 ("\n0x%x: ", p); - - if (p == pend) - { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("end of pattern ... "); - - /* If we haven't matched the entire string, and we want the - longest match, try backtracking. */ - if (d != end_match_2) - { - DEBUG_PRINT1 ("backtracking.\n"); - - if (!FAIL_STACK_EMPTY ()) - { /* More failure points to try. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); - - /* If exceeds best match so far, save it. */ - if (!best_regs_set - || (same_str_p && d > match_end) - || (!same_str_p && !MATCHING_IN_FIRST_STRING)) - { - best_regs_set = true; - match_end = d; - - DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - best_regstart[mcnt] = regstart[mcnt]; - best_regend[mcnt] = regend[mcnt]; - } - } - goto fail; - } - - /* If no failure points, don't restore garbage. */ - else if (best_regs_set) - { - restore_best_regs: - /* Restore best match. It may happen that `dend == - end_match_1' while the restored d is in string2. - For example, the pattern `x.*y.*z' against the - strings `x-' and `y-z-', if the two strings are - not consecutive in memory. */ - DEBUG_PRINT1 ("Restoring best registers.\n"); - - d = match_end; - dend = ((d >= string1 && d <= end1) - ? end_match_1 : end_match_2); - - for (mcnt = 1; mcnt < num_regs; mcnt++) - { - regstart[mcnt] = best_regstart[mcnt]; - regend[mcnt] = best_regend[mcnt]; - } - } - } /* d != end_match_2 */ - - DEBUG_PRINT1 ("Accepting match.\n"); - - /* If caller wants register contents data back, do it. */ - if (regs && !bufp->no_sub) - { - /* Have the register data arrays been allocated? */ - if (bufp->regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. We need one - extra element beyond `num_regs' for the `-1' marker - GNU code uses. */ - regs->num_regs = MAX (RE_NREGS, num_regs + 1); - regs->start = TALLOC (regs->num_regs, regoff_t); - regs->end = TALLOC (regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - bufp->regs_allocated = REGS_REALLOCATE; - } - else if (bufp->regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (regs->num_regs < num_regs + 1) - { - regs->num_regs = num_regs + 1; - RETALLOC (regs->start, regs->num_regs, regoff_t); - RETALLOC (regs->end, regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - return -2; - } - } - else - assert (bufp->regs_allocated == REGS_FIXED); - - /* Convert the pointer data in `regstart' and `regend' to - indices. Register zero has to be set differently, - since we haven't kept track of any info for it. */ - if (regs->num_regs > 0) - { - regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 - : d - string2 + size1); - } - - /* Go through the first `min (num_regs, regs->num_regs)' - registers, since that is all we initialized. */ - for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) - { - if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) - regs->start[mcnt] = regs->end[mcnt] = -1; - else - { - regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); - } - } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; - } /* regs && !bufp->no_sub */ - - FREE_VARIABLES (); - DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", - nfailure_points_pushed, nfailure_points_popped, - nfailure_points_pushed - nfailure_points_popped); - DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); - - mcnt = d - pos - (MATCHING_IN_FIRST_STRING - ? string1 - : string2 - size1); - - DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); - - return mcnt; - } - - /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif - { - /* Ignore these. Used to ignore the n of succeed_n's which - currently have n == 0. */ - case no_op: - DEBUG_PRINT1 ("EXECUTING no_op.\n"); - break; - - - /* Match the next n pattern characters exactly. The following - byte in the pattern defines n, and the n bytes after that - are the characters to match. */ - case exactn: - mcnt = *p++; - DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); - - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (translate) - { - do - { - PREFETCH (); - if (translate[(unsigned char) *d++] != (char) *p++) - goto fail; - } - while (--mcnt); - } - else - { - do - { - PREFETCH (); - if (*d++ != (char) *p++) goto fail; - } - while (--mcnt); - } - SET_REGS_MATCHED (); - break; - - - /* Match any character except possibly a newline or a null. */ - case anychar: - DEBUG_PRINT1 ("EXECUTING anychar.\n"); - - PREFETCH (); - - if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') - || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) - goto fail; - - SET_REGS_MATCHED (); - DEBUG_PRINT2 (" Matched `%d'.\n", *d); - d++; - break; - - - case charset: - case charset_not: - { - register unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; - - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); - - PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ - - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - p += 1 + *p; - - if (!not) goto fail; - - SET_REGS_MATCHED (); - d++; - break; - } - - - /* The beginning of a group is represented by start_memory. - The arguments are the register number in the next byte, and the - number of groups inner to this one in the next. The text - matched within the group is recorded (in the internal - registers data structure) under the register number. */ - case start_memory: - DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); - - /* Find out if this group can match the empty string. */ - p1 = p; /* To send to group_match_null_string_p. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[*p]) - = group_match_null_string_p (&p1, pend, reg_info); - - /* Save the position in the string where we were the last time - we were at this open-group operator in case the group is - operated upon by a repetition operator, e.g., with `(a*)*b' - against `ab'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regstart[*p]) ? d : regstart[*p] - : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", - POINTER_TO_OFFSET (old_regstart[*p])); - - regstart[*p] = d; - DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); - - IS_ACTIVE (reg_info[*p]) = 1; - MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* This is the new highest active register. */ - highest_active_reg = *p; - - /* If nothing was active before, this is the new lowest active - register. */ - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *p; - - /* Move past the register number and inner group count. */ - p += 2; - break; - - - /* The stop_memory opcode represents the end of a group. Its - arguments are the same as start_memory's: the register - number, and the number of inner groups. */ - case stop_memory: - DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); - - /* We need to save the string position the last time we were at - this close-group operator in case the group is operated - upon by a repetition operator, e.g., with `((a*)*(b*)*)*' - against `aba'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regend[*p]) ? d : regend[*p] - : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", - POINTER_TO_OFFSET (old_regend[*p])); - - regend[*p] = d; - DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); - - /* This register isn't active anymore. */ - IS_ACTIVE (reg_info[*p]) = 0; - - /* If this was the only register active, nothing is active - anymore. */ - if (lowest_active_reg == highest_active_reg) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - { /* We must scan for the new highest active register, since - it isn't necessarily one less than now: consider - (a(b)c(d(e)f)g). When group 3 ends, after the f), the - new highest active register is 1. */ - unsigned char r = *p - 1; - while (r > 0 && !IS_ACTIVE (reg_info[r])) - r--; - - /* If we end up at register zero, that means that we saved - the registers as the result of an `on_failure_jump', not - a `start_memory', and we jumped to past the innermost - `stop_memory'. For example, in ((.)*) we save - registers 1 and 2 as a result of the *, but when we pop - back to the second ), we are at the stop_memory 1. - Thus, nothing is active. */ - if (r == 0) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - highest_active_reg = r; - } - - /* If just failed to match something this time around with a - group that's operated on by a repetition operator, try to - force exit from the ``loop'', and restore the register - information for this group that we had before trying this - last match. */ - if ((!MATCHED_SOMETHING (reg_info[*p]) - || (re_opcode_t) p[-3] == start_memory) - && (p + 2) < pend) - { - boolean is_a_jump_n = false; - - p1 = p + 2; - mcnt = 0; - switch ((re_opcode_t) *p1++) - { - case jump_n: - is_a_jump_n = true; - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (is_a_jump_n) - p1 += 2; - break; - - default: - /* do nothing */ ; - } - p1 += mcnt; - - /* If the next operation is a jump backwards in the pattern - to an on_failure_jump right before the start_memory - corresponding to this stop_memory, exit from the loop - by forcing a failure after pushing on the stack the - on_failure_jump's jump in the pattern, and d. */ - if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump - && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) - { - /* If this group ever matched anything, then restore - what its registers were before trying this last - failed match, e.g., with `(a*)*b' against `ab' for - regstart[1], and, e.g., with `((a*)*(b*)*)*' - against `aba' for regend[3]. - - Also restore the registers for inner groups for, - e.g., `((a*)(b*))*' against `aba' (register 3 would - otherwise get trashed). */ - - if (EVER_MATCHED_SOMETHING (reg_info[*p])) - { - unsigned r; - - EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* Restore this and inner groups' (if any) registers. */ - for (r = *p; r < *p + *(p + 1); r++) - { - regstart[r] = old_regstart[r]; - - /* xx why this test? */ - if ((int) old_regend[r] >= (int) regstart[r]) - regend[r] = old_regend[r]; - } - } - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - PUSH_FAILURE_POINT (p1 + mcnt, d, -2); - - goto fail; - } - } - - /* Move past the register number and the inner group count. */ - p += 2; - break; - - - /* \<digit> has been turned into a `duplicate' command which is - followed by the numeric value of <digit> as the register number. */ - case duplicate: - { - register const char *d2, *dend2; - int regno = *p++; /* Get which register to match against. */ - DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); - - /* Can't back reference a group which we've never matched. */ - if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) - goto fail; - - /* Where in input to try to start matching. */ - d2 = regstart[regno]; - - /* Where to stop matching; if both the place to start and - the place to stop matching are in the same string, then - set to the place to stop, otherwise, for now have to use - the end of the first string. */ - - dend2 = ((FIRST_STRING_P (regstart[regno]) - == FIRST_STRING_P (regend[regno])) - ? regend[regno] : end_match_1); - for (;;) - { - /* If necessary, advance to next segment in register - contents. */ - while (d2 == dend2) - { - if (dend2 == end_match_2) break; - if (dend2 == regend[regno]) break; - - /* End of string1 => advance to string2. */ - d2 = string2; - dend2 = regend[regno]; - } - /* At end of register contents => success */ - if (d2 == dend2) break; - - /* If necessary, advance to next segment in data. */ - PREFETCH (); - - /* How many characters left in this segment to match. */ - mcnt = dend - d; - - /* Want how many consecutive characters we can match in - one shot, so, if necessary, adjust the count. */ - if (mcnt > dend2 - d2) - mcnt = dend2 - d2; - - /* Compare that many; failure if mismatch, else move - past them. */ - if (translate - ? bcmp_translate (d, d2, mcnt, translate) - : bcmp (d, d2, mcnt)) - goto fail; - d += mcnt, d2 += mcnt; - } - } - break; - - - /* begline matches the empty string at the beginning of the string - (unless `not_bol' is set in `bufp'), and, if - `newline_anchor' is set, after newlines. */ - case begline: - DEBUG_PRINT1 ("EXECUTING begline.\n"); - - if (AT_STRINGS_BEG (d)) - { - if (!bufp->not_bol) break; - } - else if (d[-1] == '\n' && bufp->newline_anchor) - { - break; - } - /* In all other cases, we fail. */ - goto fail; - - - /* endline is the dual of begline. */ - case endline: - DEBUG_PRINT1 ("EXECUTING endline.\n"); - - if (AT_STRINGS_END (d)) - { - if (!bufp->not_eol) break; - } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) - { - break; - } - goto fail; - - - /* Match at the very beginning of the data. */ - case begbuf: - DEBUG_PRINT1 ("EXECUTING begbuf.\n"); - if (AT_STRINGS_BEG (d)) - break; - goto fail; - - - /* Match at the very end of the data. */ - case endbuf: - DEBUG_PRINT1 ("EXECUTING endbuf.\n"); - if (AT_STRINGS_END (d)) - break; - goto fail; - - - /* on_failure_keep_string_jump is used to optimize `.*\n'. It - pushes NULL as the value for the string on the stack. Then - `pop_failure_point' will keep the current value for the - string, instead of restoring it. To see why, consider - matching `foo\nbar' against `.*\n'. The .* matches the foo; - then the . fails against the \n. But the next thing we want - to do is match the \n against the \n; if we restored the - string value, we would be back at the foo. - - Because this is used only in specific cases, we don't need to - check all the things that `on_failure_jump' does, to make - sure the right things get saved on the stack. Hence we don't - share its code. The only reason to push anything on the - stack at all is that otherwise we would have to change - `anychar's code to do something besides goto fail in this - case; that seems worse than this. */ - case on_failure_keep_string_jump: - DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); - - PUSH_FAILURE_POINT (p + mcnt, NULL, -2); - break; - - - /* Uses of on_failure_jump: - - Each alternative starts with an on_failure_jump that points - to the beginning of the next alternative. Each alternative - except the last ends with a jump that in effect jumps past - the rest of the alternatives. (They really jump to the - ending jump of the following alternative, because tensioning - these jumps is a hassle.) - - Repeats start with an on_failure_jump that points past both - the repetition text and either the following jump or - pop_failure_jump back to this on_failure_jump. */ - case on_failure_jump: - on_failure: - DEBUG_PRINT1 ("EXECUTING on_failure_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); - - /* If this on_failure_jump comes right before a group (i.e., - the original * applied to a group), save the information - for that group and all inner ones, so that if we fail back - to this point, the group's information will be correct. - For example, in \(a*\)*\1, we need the preceding group, - and in \(\(a*\)b*\)\2, we need the inner group. */ - - /* We can't use `p' to check ahead because we push - a failure point to `p + mcnt' after we do this. */ - p1 = p; - - /* We need to skip no_op's before we look for the - start_memory in case this on_failure_jump is happening as - the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 - against aba. */ - while (p1 < pend && (re_opcode_t) *p1 == no_op) - p1++; - - if (p1 < pend && (re_opcode_t) *p1 == start_memory) - { - /* We have a new highest active register now. This will - get reset at the start_memory we are about to get to, - but we will have saved all the registers relevant to - this repetition op, as described above. */ - highest_active_reg = *(p1 + 1) + *(p1 + 2); - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *(p1 + 1); - } - - DEBUG_PRINT1 (":\n"); - PUSH_FAILURE_POINT (p + mcnt, d, -2); - break; - - - /* A smart repeat ends with `maybe_pop_jump'. - We change it to either `pop_failure_jump' or `jump'. */ - case maybe_pop_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); - { - register unsigned char *p2 = p; - - /* Compare the beginning of the repeat with what in the - pattern follows its end. If we can establish that there - is nothing that they would both match, i.e., that we - would have to backtrack because of (as in, e.g., `a*a') - then we can change to pop_failure_jump, because we'll - never have to backtrack. - - This is not true in the case of alternatives: in - `(a|ab)*' we do need to backtrack to the `ab' alternative - (e.g., if the string was `ab'). But instead of trying to - detect that here, the alternative has put on a dummy - failure point which is what we will end up popping. */ - - /* Skip over open/close-group commands. */ - while (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; /* Skip over args, too. */ - - /* If we're at the end of the pattern, we can change. */ - if (p2 == pend) - { - /* Consider what happens when matching ":\(.*\)" - against ":/". I don't really understand this code - yet. */ - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 - (" End of pattern: change to `pop_failure_jump'.\n"); - } - - else if ((re_opcode_t) *p2 == exactn - || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) - { - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; - p1 = p + mcnt; - - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ - if ((re_opcode_t) p1[3] == exactn && p1[5] != c) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset - || (re_opcode_t) p1[3] == charset_not) - { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned char) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - /* `not' is equal to 1 if c would match, which means - that we can't change to pop_failure_jump. */ - if (!not) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - } - p -= 2; /* Point at relative address again. */ - if ((re_opcode_t) p[-1] != pop_failure_jump) - { - p[-1] = (unsigned char) jump; - DEBUG_PRINT1 (" Match => jump.\n"); - goto unconditional_jump; - } - /* Note fall through. */ - - - /* The end of a simple repeat has a pop_failure_jump back to - its matching on_failure_jump, where the latter will push a - failure point. The pop_failure_jump takes off failure - points put on by this pop_failure_jump's matching - on_failure_jump; we got through the pattern to here from the - matching on_failure_jump, so didn't fail. */ - case pop_failure_jump: - { - /* We need to pass separate storage for the lowest and - highest registers, even though we don't care about the - actual values. Otherwise, we will restore only one - register from the stack, since lowest will == highest in - `pop_failure_point'. */ - unsigned dummy_low_reg, dummy_high_reg; - unsigned char *pdummy; - const char *sdummy; - - DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); - POP_FAILURE_POINT (sdummy, pdummy, - dummy_low_reg, dummy_high_reg, - reg_dummy, reg_dummy, reg_info_dummy); - } - /* Note fall through. */ - - - /* Unconditionally jump (without popping any failure points). */ - case jump: - unconditional_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ - DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); - p += mcnt; /* Do the jump. */ - DEBUG_PRINT2 ("(to 0x%x).\n", p); - break; - - - /* We need this opcode so we can detect where alternatives end - in `group_match_null_string_p' et al. */ - case jump_past_alt: - DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); - goto unconditional_jump; - - - /* Normally, the on_failure_jump pushes a failure point, which - then gets popped at pop_failure_jump. We will end up at - pop_failure_jump, also, and with a pattern of, say, `a+', we - are skipping over the on_failure_jump, so we have to push - something meaningless for pop_failure_jump to pop. */ - case dummy_failure_jump: - DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); - /* It doesn't matter what we push for the string here. What - the code at `fail' tests is the value for the pattern. */ - PUSH_FAILURE_POINT (0, 0, -2); - goto unconditional_jump; - - - /* At the end of an alternative, we need to push a dummy failure - point in case we are followed by a `pop_failure_jump', because - we don't want the failure point for the alternative to be - popped. For example, matching `(a|ab)*' against `aab' - requires that we match the `ab' alternative. */ - case push_dummy_failure: - DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); - /* See comments just above at `dummy_failure_jump' about the - two zeroes. */ - PUSH_FAILURE_POINT (0, 0, -2); - break; - - /* Have to succeed matching what follows at least n times. - After that, handle like `on_failure_jump'. */ - case succeed_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); - - assert (mcnt >= 0); - /* Originally, this is how many times we HAVE to succeed. */ - if (mcnt > 0) - { - mcnt--; - p += 2; - STORE_NUMBER_AND_INCR (p, mcnt); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); - } - else if (mcnt == 0) - { - DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); - p[2] = (unsigned char) no_op; - p[3] = (unsigned char) no_op; - goto on_failure; - } - break; - - case jump_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); - - /* Originally, this is how many times we CAN jump. */ - if (mcnt) - { - mcnt--; - STORE_NUMBER (p + 2, mcnt); - goto unconditional_jump; - } - /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; - break; - - case set_number_at: - { - DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p1 = p + mcnt; - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); - STORE_NUMBER (p1, mcnt); - break; - } - - case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - break; - goto fail; - - case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - goto fail; - break; - - case wordbeg: - DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); - if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) - break; - goto fail; - - case wordend: - DEBUG_PRINT1 ("EXECUTING wordend.\n"); - if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) - && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) - break; - goto fail; - -#ifdef emacs -#ifdef emacs19 - case before_dot: - DEBUG_PRINT1 ("EXECUTING before_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) >= point) - goto fail; - break; - - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) != point) - goto fail; - break; - - case after_dot: - DEBUG_PRINT1 ("EXECUTING after_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) <= point) - goto fail; - break; -#else /* not emacs19 */ - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) - goto fail; - break; -#endif /* not emacs19 */ - - case syntaxspec: - DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchsyntax; - - case wordchar: - DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); - mcnt = (int) Sword; - matchsyntax: - PREFETCH (); - if (SYNTAX (*d++) != (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - - case notsyntaxspec: - DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchnotsyntax; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); - mcnt = (int) Sword; - matchnotsyntax: - PREFETCH (); - if (SYNTAX (*d++) == (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - -#else /* not emacs */ - case wordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); - PREFETCH (); - if (!WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); - PREFETCH (); - if (WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; -#endif /* not emacs */ - - default: - abort (); - } - continue; /* Successfully executed one pattern command; keep going. */ - - - /* We goto here if a matching operation fails. */ - fail: - if (!FAIL_STACK_EMPTY ()) - { /* A restart point is known. Restore to that state. */ - DEBUG_PRINT1 ("\nFAIL:\n"); - POP_FAILURE_POINT (d, p, - lowest_active_reg, highest_active_reg, - regstart, regend, reg_info); - - /* If this failure point is a dummy, try the next one. */ - if (!p) - goto fail; - - /* If we failed to the end of the pattern, don't examine *p. */ - assert (p <= pend); - if (p < pend) - { - boolean is_a_jump_n = false; - - /* If failed to a backwards jump that's part of a repetition - loop, need to pop this failure point and use the next one. */ - switch ((re_opcode_t) *p) - { - case jump_n: - is_a_jump_n = true; - case maybe_pop_jump: - case pop_failure_jump: - case jump: - p1 = p + 1; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - - if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) - || (!is_a_jump_n - && (re_opcode_t) *p1 == on_failure_jump)) - goto fail; - break; - default: - /* do nothing */ ; - } - } - - if (d >= string1 && d <= end1) - dend = end_match_1; - } - else - break; /* Matching at this starting point really fails. */ - } /* for (;;) */ - - if (best_regs_set) - goto restore_best_regs; - - FREE_VARIABLES (); - - return -1; /* Failure to match. */ -} /* re_match_2 */ - -/* Subroutine definitions for re_match_2. */ - - -/* We are passed P pointing to a register number after a start_memory. - - Return true if the pattern up to the corresponding stop_memory can - match the empty string, and false otherwise. - - If we find the matching stop_memory, sets P to point to one past its number. - Otherwise, sets P to an undefined byte less than or equal to END. - - We don't handle duplicates properly (yet). */ - -static boolean -group_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - /* Point to after the args to the start_memory. */ - unsigned char *p1 = *p + 2; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and return true or - false, as appropriate, when we get to one that can't, or to the - matching stop_memory. */ - - switch ((re_opcode_t) *p1) - { - /* Could be either a loop or a series of alternatives. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - /* If the next operation is not a jump backwards in the - pattern. */ - - if (mcnt >= 0) - { - /* Go through the on_failure_jumps of the alternatives, - seeing if any of the alternatives cannot match nothing. - The last alternative starts with only a jump, - whereas the rest start with on_failure_jump and end - with a jump, e.g., here is the pattern for `a|b|c': - - /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 - /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 - /exactn/1/c - - So, we have to first go through the first (n-1) - alternatives and then deal with the last one separately. */ - - - /* Deal with the first (n-1) alternatives, which start - with an on_failure_jump (see above) that jumps to right - past a jump_past_alt. */ - - while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) - { - /* `mcnt' holds how many bytes long the alternative - is, including the ending `jump_past_alt' and - its number. */ - - if (!alt_match_null_string_p (p1, p1 + mcnt - 3, - reg_info)) - return false; - - /* Move to right after this alternative, including the - jump_past_alt. */ - p1 += mcnt; - - /* Break if it's the beginning of an n-th alternative - that doesn't begin with an on_failure_jump. */ - if ((re_opcode_t) *p1 != on_failure_jump) - break; - - /* Still have to check that it's not an n-th - alternative that starts with an on_failure_jump. */ - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) - { - /* Get to the beginning of the n-th alternative. */ - p1 -= 3; - break; - } - } - - /* Deal with the last alternative: go back and get number - of the `jump_past_alt' just before it. `mcnt' contains - the length of the alternative. */ - EXTRACT_NUMBER (mcnt, p1 - 2); - - if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) - return false; - - p1 += mcnt; /* Get past the n-th alternative. */ - } /* if mcnt > 0 */ - break; - - - case stop_memory: - assert (p1[1] == **p); - *p = p1 + 2; - return true; - - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return false; -} /* group_match_null_string_p */ - - -/* Similar to group_match_null_string_p, but doesn't deal with alternatives: - It expects P to be the first byte of a single alternative and END one - byte past the last. The alternative can contain groups. */ - -static boolean -alt_match_null_string_p (p, end, reg_info) - unsigned char *p, *end; - register_info_type *reg_info; -{ - int mcnt; - unsigned char *p1 = p; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and break when we get - to one that can't. */ - - switch ((re_opcode_t) *p1) - { - /* It's a loop. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - break; - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return true; -} /* alt_match_null_string_p */ - - -/* Deals with the ops common to group_match_null_string_p and - alt_match_null_string_p. - - Sets P to one after the op and its arguments, if any. */ - -static boolean -common_op_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - boolean ret; - int reg_no; - unsigned char *p1 = *p; - - switch ((re_opcode_t) *p1++) - { - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbeg: - case wordend: - case wordbound: - case notwordbound: -#ifdef emacs - case before_dot: - case at_dot: - case after_dot: +# endif #endif - break; - - case start_memory: - reg_no = *p1; - assert (reg_no > 0 && reg_no <= MAX_REGNUM); - ret = group_match_null_string_p (&p1, end, reg_info); - - /* Have to set this here in case we're checking a group which - contains a group and a back reference to it. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; - - if (!ret) - return false; - break; - - /* If this is an optimized succeed_n for zero times, make the jump. */ - case jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (mcnt >= 0) - p1 += mcnt; - else - return false; - break; - - case succeed_n: - /* Get to the number of times to succeed. */ - p1 += 2; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - if (mcnt == 0) - { - p1 -= 4; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - } - else - return false; - break; - - case duplicate: - if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) - return false; - break; - - case set_number_at: - p1 += 4; - - default: - /* All other opcodes mean we cannot match the empty string. */ - return false; - } - - *p = p1; - return true; -} /* common_op_match_null_string_p */ - - -/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN - bytes; nonzero otherwise. */ - -static int -bcmp_translate( - unsigned char *s1, - unsigned char *s2, - int len, - char *translate -) -{ - register unsigned char *p1 = s1, *p2 = s2; - while (len) - { - if (translate[*p1++] != translate[*p2++]) return 1; - len--; - } - return 0; -} - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length SIZE) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. - - We call regex_compile to do the actual compilation. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - int length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub. */ - bufp->no_sub = 0; - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = regex_compile (pattern, length, re_syntax_options, bufp); - - return re_error_msg[(int) ret]; -} - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them if this is an Emacs or POSIX compilation. */ - -#if !defined (emacs) && !defined (_POSIX_SOURCE) - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - - if (!s) - { - if (!re_comp_buf.buffer) - return "No previous regular expression"; - return 0; - } - - if (!re_comp_buf.buffer) - { - re_comp_buf.buffer = (unsigned char *) malloc (200); - if (re_comp_buf.buffer == NULL) - return "Memory exhausted"; - re_comp_buf.allocated = 200; - - re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); - if (re_comp_buf.fastmap == NULL) - return "Memory exhausted"; - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - /* Yes, we're discarding `const' here. */ - return (char *) re_error_msg[(int) ret]; -} - - -int -re_exec (s) - const char *s; -{ - const int len = strlen (s); - return - 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); -} -#endif /* not emacs and not _POSIX_SOURCE */ - -/* POSIX.2 functions. Don't define these for Emacs. */ - -#ifndef emacs - -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' and `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *preg; - const char *pattern; - int cflags; -{ - reg_errcode_t ret; - unsigned syntax - = (cflags & REG_EXTENDED) ? - RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; - - /* regex_compile will allocate the space for the compiled pattern. */ - preg->buffer = 0; - preg->allocated = 0; - - /* Don't bother to use a fastmap when searching. This simplifies the - REG_NEWLINE case: if we used a fastmap, we'd have to put all the - characters after newlines into the fastmap. This way, we just try - every character. */ - preg->fastmap = 0; - - if (cflags & REG_ICASE) - { - unsigned i; - - preg->translate = (char *) malloc (CHAR_SET_SIZE); - if (preg->translate == NULL) - return (int) REG_ESPACE; - - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? tolower (i) : i; - } - else - preg->translate = NULL; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - - preg->no_sub = !!(cflags & REG_NOSUB); - - /* POSIX says a null character in the pattern terminates it, so we - can use strlen here in compiling the pattern. */ - ret = regex_compile (pattern, strlen (pattern), syntax, preg); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) ret = REG_EPAREN; - - return (int) ret; -} - - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *preg; - const char *string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - int ret; - struct re_registers regs; - regex_t private_preg; - int len = strlen (string); - boolean want_reg_info = !preg->no_sub && nmatch > 0; - - private_preg = *preg; - - private_preg.not_bol = !!(eflags & REG_NOTBOL); - private_preg.not_eol = !!(eflags & REG_NOTEOL); - - /* The user has told us exactly how many registers to return - information about, via `nmatch'. We have to pass that on to the - matching routines. */ - private_preg.regs_allocated = REGS_FIXED; - - if (want_reg_info) - { - regs.num_regs = nmatch; - regs.start = TALLOC (nmatch, regoff_t); - regs.end = TALLOC (nmatch, regoff_t); - if (regs.start == NULL || regs.end == NULL) - return (int) REG_NOMATCH; - } - - /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, - /* start: */ 0, /* range: */ len, - want_reg_info ? ®s : (struct re_registers *) 0); - - /* Copy the register information to the POSIX structure. */ - if (want_reg_info) - { - if (ret >= 0) - { - unsigned r; - - for (r = 0; r < nmatch; r++) - { - pmatch[r].rm_so = regs.start[r]; - pmatch[r].rm_eo = regs.end[r]; - } - } - - /* If we needed the temporary register info, free the space now. */ - free (regs.start); - free (regs.end); - } - - /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; -} - - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror(int errcode, const regex_t *preg, - char *errbuf, size_t errbuf_size) -{ - const char *msg; - size_t msg_size; - - if (errcode < 0 - || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = re_error_msg[errcode]; - - /* POSIX doesn't require that we do anything in this case, but why - not be nice. */ - if (! msg) - msg = "Success"; - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (errbuf_size != 0) - { - if (msg_size > errbuf_size) - { - strncpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; - } - else - strcpy (errbuf, msg); - } - - return msg_size; -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - if (preg->buffer != NULL) - free (preg->buffer); - preg->buffer = NULL; - - preg->allocated = 0; - preg->used = 0; - - if (preg->fastmap != NULL) - free (preg->fastmap); - preg->fastmap = NULL; - preg->fastmap_accurate = 0; - - if (preg->translate != NULL) - free (preg->translate); - preg->translate = NULL; -} - -#endif /* not emacs */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/compat/regex/regex.h b/compat/regex/regex.h index 6eb64f1402..61c9683872 100644 --- a/compat/regex/regex.h +++ b/compat/regex/regex.h @@ -1,70 +1,90 @@ -/* Definitions for data structures and routines for the regular - expression library, version 0.12. +#include <stdio.h> +#include <stddef.h> - Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. +/* Definitions for data structures and routines for the regular + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008 + Free Software Foundation, Inc. + This file is part of the GNU C Library. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ -/* POSIX says that <sys/types.h> must be included (by the caller) before - <regex.h>. */ +#ifndef _REGEX_H +#define _REGEX_H 1 -#ifdef VMS -/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it - should be there. */ +#ifdef HAVE_STDDEF_H #include <stddef.h> #endif +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif + +#ifndef _LIBC +#define __USE_GNU 1 +#endif + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; /* The following bits are used to determine the regexp syntax we recognize. The set/not-set meanings are chosen so that Emacs syntax remains the value 0. The bits are given in alphabetical order, and the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; +typedef unsigned long int reg_syntax_t; +#ifdef __USE_GNU /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) +# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) /* If this bit is not set, then + and ? are operators, and \+ and \? are literals. If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) +# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) /* If this bit is set, then character classes are supported. They are: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) +# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) /* If this bit is set, then ^ and $ are always anchors (outside bracket expressions, of course). If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because POSIX draft 11.2 says that * etc. in leading positions is undefined. We already implemented a previous draft which made those constructs invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) +# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) /* If this bit is set, then special characters are always special regardless of where they are in the pattern. @@ -72,63 +92,94 @@ typedef unsigned reg_syntax_t; some contexts; otherwise they are ordinary. Specifically, * + ? and intervals are only special when not after the beginning, open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) +# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) /* If this bit is set, then *, +, ?, and { cannot be first in an re or immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) +# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) /* If this bit is set, then . matches newline. If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) +# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) /* If this bit is set, then . doesn't match NUL. If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) +# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) /* If this bit is set, nonmatching lists [^...] do not match newline. If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) +# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) /* If this bit is set, either \{...\} or {...} defines an interval, depending on RE_NO_BK_BRACES. If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) +# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) /* If this bit is set, +, ? and | aren't recognized as operators. If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) +# define RE_LIMITED_OPS (RE_INTERVALS << 1) /* If this bit is set, newline is an alternation operator. If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) +# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) /* If this bit is set, then `{...}' defines an interval, and \{ and \} are literals. If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) +# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) /* If this bit is set, (...) defines a group, and \( and \) are literals. If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) +# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) /* If this bit is set, then \<digit> matches <digit>. If not set, then \<digit> is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) +# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) /* If this bit is set, then | is an alternation operator, and \| is literal. If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) +# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) /* If this bit is set, then an ending range point collating higher than the starting range point, as in [z-a], is invalid. If not set, then when ending range point collates higher than the starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) +# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) /* If this bit is set, then an unmatched ) is ordinary. If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) +# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) +#endif /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is @@ -136,6 +187,7 @@ typedef unsigned reg_syntax_t; already-compiled regexps. */ extern reg_syntax_t re_syntax_options; +#ifdef __USE_GNU /* Define combinations of the above bits for the standard possibilities. (The [[[ comments delimit what gets put into the Texinfo file, so don't delete them!) */ @@ -143,13 +195,22 @@ extern reg_syntax_t re_syntax_options; #define RE_SYNTAX_EMACS 0 #define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INVALID_INTERVAL_ORD) \ + & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS \ + | RE_INVALID_INTERVAL_ORD) #define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ @@ -163,7 +224,8 @@ extern reg_syntax_t re_syntax_options; | RE_NO_BK_VBAR) #define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC @@ -176,7 +238,7 @@ extern reg_syntax_t re_syntax_options; | RE_INTERVALS | RE_NO_EMPTY_RANGES) #define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this @@ -185,13 +247,13 @@ extern reg_syntax_t re_syntax_options; (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) #define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ @@ -202,10 +264,12 @@ extern reg_syntax_t re_syntax_options; /* Maximum number of duplicates an interval can allow. Some systems (erroneously) define this in other header files, but we want our value, so remove any previous define. */ -#ifdef RE_DUP_MAX -#undef RE_DUP_MAX +# ifdef RE_DUP_MAX +# undef RE_DUP_MAX +# endif +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +# define RE_DUP_MAX (0x7fff) #endif -#define RE_DUP_MAX ((1 << 15) - 1) /* POSIX `cflags' bits (i.e., information for `regcomp'). */ @@ -240,18 +304,26 @@ extern reg_syntax_t re_syntax_options; /* Like REG_NOTBOL, except for the end-of-line. */ #define REG_NOTEOL (1 << 1) +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + /* If any error codes are removed, changed, or added, update the `re_error_msg' table in regex.c. */ typedef enum { +#if defined _XOPEN_SOURCE || defined __USE_XOPEN2K + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + REG_NOERROR = 0, /* Success. */ REG_NOMATCH, /* Didn't find a match (for regexec). */ /* POSIX regcomp return error codes. (In the order listed in the standard.) */ REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Not implemented. */ + REG_ECOLLATE, /* Inalid collating element. */ REG_ECTYPE, /* Invalid character class name. */ REG_EESCAPE, /* Trailing backslash. */ REG_ESUBREG, /* Invalid back reference. */ @@ -275,85 +347,92 @@ typedef enum compiled, the `re_nsub' field is available. All other fields are private to the regex routines. */ +#ifndef RE_TRANSLATE_TYPE +# define __RE_TRANSLATE_TYPE unsigned char * +# ifdef __USE_GNU +# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE +# endif +#endif + +#ifdef __USE_GNU +# define __REPB_PREFIX(name) name +#else +# define __REPB_PREFIX(name) __##name +#endif + struct re_pattern_buffer { -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *__REPB_PREFIX(buffer); - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; + /* Number of bytes to which `buffer' points. */ + unsigned long int __REPB_PREFIX(allocated); - /* Number of bytes actually used in `buffer'. */ - unsigned long used; + /* Number of bytes actually used in `buffer'. */ + unsigned long int __REPB_PREFIX(used); - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t __REPB_PREFIX(syntax); - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *__REPB_PREFIX(fastmap); - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); - /* Number of subexpressions found by the compiler. */ + /* Number of subexpressions found by the compiler. */ size_t re_nsub; - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ -}; + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned __REPB_PREFIX(can_be_null) : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#ifdef __USE_GNU +# define REGS_UNALLOCATED 0 +# define REGS_REALLOCATE 1 +# define REGS_FIXED 2 +#endif + unsigned __REPB_PREFIX(regs_allocated) : 2; -typedef struct re_pattern_buffer regex_t; + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned __REPB_PREFIX(fastmap_accurate) : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned __REPB_PREFIX(no_sub) : 1; + + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned __REPB_PREFIX(not_bol) : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned __REPB_PREFIX(not_eol) : 1; + /* If true, an anchor at a newline matches. */ + unsigned __REPB_PREFIX(newline_anchor) : 1; +}; -/* search.c (search_buffer) in Emacs needs this one opcode value. It is - defined both in `regex.c' and here. */ -#define RE_EXACTN_VALUE 1 +typedef struct re_pattern_buffer regex_t; /* Type for byte offsets within the string. POSIX mandates this. */ typedef int regoff_t; +#ifdef __USE_GNU /* This is the structure we store register match data in. See regex.texinfo for a full description of what registers match. */ struct re_registers @@ -367,8 +446,9 @@ struct re_registers /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, `re_match_2' returns information about at least this many registers the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -#define RE_NREGS 30 +# ifndef RE_NREGS +# define RE_NREGS 30 +# endif #endif @@ -383,38 +463,22 @@ typedef struct /* Declarations for routines. */ -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -#define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -#define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - +#ifdef __USE_GNU /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); /* Compile the regular expression PATTERN, with length LENGTH and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, int length, - struct re_pattern_buffer *buffer)); +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); /* Compile a fastmap for the compiled pattern in BUFFER; used to accelerate searches. Return 0 if successful and -2 if was an internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); /* Search in the string STRING (with length LENGTH) for the pattern @@ -422,31 +486,30 @@ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); +extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring, + int __length, int __start, int __range, + struct re_registers *__regs); /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); /* Like `re_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); +extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring, + int __length, int __start, struct re_registers *__regs); /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -461,30 +524,59 @@ extern int re_match_2 Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); - +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); +#endif /* Use GNU */ + +#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD) +# ifndef _CRAY /* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ + && !defined __GNUG__ +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif +#endif /* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); -extern int regexec - _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags)); -extern size_t regerror - _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size)); -extern void regfree _RE_ARGS ((regex_t *preg)); - -#endif /* not __REGEXP_LIBRARY_H__ */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __cstring, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c new file mode 100644 index 0000000000..193854cf5b --- /dev/null +++ b/compat/regex/regex_internal.c @@ -0,0 +1,1744 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static void re_string_construct_common (const char *str, int len, + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) internal_function; +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int hash) internal_function; +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, + const re_node_set *nodes, + unsigned int context, + unsigned int hash) internal_function; + +#ifdef GAWK +#undef MAX /* safety */ +static int +MAX(size_t a, size_t b) +{ + return (a > b ? a : b); +} +#endif + +/* Functions for string operation. */ + +/* This function allocate the buffers. It is necessary to call + re_string_reconstruct before using the object. */ + +static reg_errcode_t +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + int init_buf_len; + + /* Ensure at least one character fits into the buffers. */ + if (init_len < dfa->mb_cur_max) + init_len = dfa->mb_cur_max; + init_buf_len = (len + 1 < init_len) ? len + 1: init_len; + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + ret = re_string_realloc_buffers (pstr, init_buf_len); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; + pstr->valid_raw_len = pstr->valid_len; + return REG_NOERROR; +} + +/* This function allocate the buffers, and initialize them. */ + +static reg_errcode_t +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) +{ + reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); + re_string_construct_common (str, len, pstr, trans, icase, dfa); + + if (len > 0) + { + ret = re_string_realloc_buffers (pstr, len + 1); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; + + if (icase) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + { + while (1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + if (pstr->valid_raw_len >= len) + break; + if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) + break; + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + { + pstr->valid_len = pstr->bufs_len; + pstr->valid_raw_len = pstr->bufs_len; + } + } + } + + return REG_NOERROR; +} + +/* Helper functions for re_string_allocate, and re_string_construct. */ + +static reg_errcode_t +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) +{ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + wint_t *new_wcs; + + /* Avoid overflow in realloc. */ + const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int)); + if (BE (SIZE_MAX / max_object_size < new_buf_len, 0)) + return REG_ESPACE; + + new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_wcs == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_wcs; + if (pstr->offsets != NULL) + { + int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len); + if (BE (new_offsets == NULL, 0)) + return REG_ESPACE; + pstr->offsets = new_offsets; + } + } +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + { + unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_mbs == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_mbs; + } + pstr->bufs_len = new_buf_len; + return REG_NOERROR; +} + + +static void +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) +{ + pstr->raw_mbs = (const unsigned char *) str; + pstr->len = len; + pstr->raw_len = len; + pstr->trans = trans; + pstr->icase = icase ? 1 : 0; + pstr->mbs_allocated = (trans != NULL || icase); + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; + pstr->stop = pstr->len; + pstr->raw_stop = pstr->stop; +} + +#ifdef RE_ENABLE_I18N + +/* Build wide character buffer PSTR->WCS. + If the byte sequence of the string are: + <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> + Then wide character buffer will be: + <wc1> , WEOF , <wc2> , WEOF , <wc3> + We use WEOF for padding, they indicate that the position isn't + a first byte of a multibyte character. + + Note that this function assumes PSTR->VALID_LEN elements are already + built and starts from PSTR->VALID_LEN. */ + +static void +internal_function +build_wcs_buffer (re_string_t *pstr) +{ +#ifdef _LIBC + unsigned char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + unsigned char buf[64]; +#endif + mbstate_t prev_st; + int byte_idx, end_idx, remain_len; + size_t mbclen; + + /* Build the buffers from pstr->valid_len to either pstr->len or + pstr->bufs_len. */ + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; + buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + if (BE (pstr->trans != NULL, 0)) + wc = pstr->trans[wc]; + pstr->cur_state = prev_st; + } + + /* Write wide character and padding. */ + pstr->wcs[byte_idx++] = wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; +} + +/* Build wide character buffer PSTR->WCS like build_wcs_buffer, + but for REG_ICASE. */ + +static reg_errcode_t +internal_function +build_wcs_upper_buffer (re_string_t *pstr) +{ + mbstate_t prev_st; + int src_idx, byte_idx, end_idx, remain_len; + size_t mbclen; +#ifdef _LIBC + char buf[MB_LEN_MAX]; + assert (MB_LEN_MAX >= pstr->mb_cur_max); +#else + char buf[64]; +#endif + + byte_idx = pstr->valid_len; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + /* The following optimization assumes that ASCII characters can be + mapped to wide characters with a simple cast. */ + if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) + { + while (byte_idx < end_idx) + { + wchar_t wc; + + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) + { + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + ASCII-safe: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; + continue; + } + + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb (buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else + { + src_idx = byte_idx; + goto offsets_needed; + } + } + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->mbs[byte_idx] = ch; + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = byte_idx; + return REG_NOERROR; + } + else + for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) + { + wchar_t wc; + const char *p; + offsets_needed: + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + if (BE (pstr->trans != NULL, 0)) + { + int i, ch; + + for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) + { + ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; + buf[i] = pstr->trans[ch]; + } + p = (const char *) buf; + } + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); + if (BE (mbclen + 2 > 2, 1)) + { + wchar_t wcu = wc; + if (iswlower (wc)) + { + size_t mbcdlen; + + wcu = towupper (wc); + mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); + if (BE (mbclen == mbcdlen, 1)) + memcpy (pstr->mbs + byte_idx, buf, mbclen); + else if (mbcdlen != (size_t) -1) + { + size_t i; + + if (byte_idx + mbcdlen > pstr->bufs_len) + { + pstr->cur_state = prev_st; + break; + } + + if (pstr->offsets == NULL) + { + pstr->offsets = re_malloc (int, pstr->bufs_len); + + if (pstr->offsets == NULL) + return REG_ESPACE; + } + if (!pstr->offsets_needed) + { + for (i = 0; i < (size_t) byte_idx; ++i) + pstr->offsets[i] = i; + pstr->offsets_needed = 1; + } + + memcpy (pstr->mbs + byte_idx, buf, mbcdlen); + pstr->wcs[byte_idx] = wcu; + pstr->offsets[byte_idx] = src_idx; + for (i = 1; i < mbcdlen; ++i) + { + pstr->offsets[byte_idx + i] + = src_idx + (i < mbclen ? i : mbclen - 1); + pstr->wcs[byte_idx + i] = WEOF; + } + pstr->len += mbcdlen - mbclen; + if (pstr->raw_stop > src_idx) + pstr->stop += mbcdlen - mbclen; + end_idx = (pstr->bufs_len > pstr->len) + ? pstr->len : pstr->bufs_len; + byte_idx += mbcdlen; + src_idx += mbclen; + continue; + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + } + else + memcpy (pstr->mbs + byte_idx, p, mbclen); + + if (BE (pstr->offsets_needed != 0, 0)) + { + size_t i; + for (i = 0; i < mbclen; ++i) + pstr->offsets[byte_idx + i] = src_idx + i; + } + src_idx += mbclen; + + pstr->wcs[byte_idx++] = wcu; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* It is an invalid character or '\0'. Just use the byte. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; + + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans [ch]; + pstr->mbs[byte_idx] = ch; + + if (BE (pstr->offsets_needed != 0, 0)) + pstr->offsets[byte_idx] = src_idx; + ++src_idx; + + /* And also cast it to wide char. */ + pstr->wcs[byte_idx++] = (wchar_t) ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } + pstr->valid_len = byte_idx; + pstr->valid_raw_len = src_idx; + return REG_NOERROR; +} + +/* Skip characters until the index becomes greater than NEW_RAW_IDX. + Return the index. */ + +static int +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) +{ + mbstate_t prev_st; + int rawbuf_idx; + size_t mbclen; + wint_t wc = WEOF; + + /* Skip the characters which are not necessary to check. */ + for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; + rawbuf_idx < new_raw_idx;) + { + wchar_t wc2; + int remain_len = pstr->len - rawbuf_idx; + prev_st = pstr->cur_state; + mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, + remain_len, &pstr->cur_state); + if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) + { + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); + mbclen = 1; + pstr->cur_state = prev_st; + } + else + wc = (wint_t) wc2; + /* Then proceed the next character. */ + rawbuf_idx += mbclen; + } + *last_wc = (wint_t) wc; + return rawbuf_idx; +} +#endif /* RE_ENABLE_I18N */ + +/* Build the buffer PSTR->MBS, and apply the translation if we need. + This function is used in case of REG_ICASE. */ + +static void +internal_function +build_upper_buffer (re_string_t *pstr) +{ + int char_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; + if (BE (pstr->trans != NULL, 0)) + ch = pstr->trans[ch]; + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; + } + pstr->valid_len = char_idx; + pstr->valid_raw_len = char_idx; +} + +/* Apply TRANS to the buffer in PSTR. */ + +static void +internal_function +re_string_translate_buffer (re_string_t *pstr) +{ + int buf_idx, end_idx; + end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; + + for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) + { + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; + pstr->mbs[buf_idx] = pstr->trans[ch]; + } + + pstr->valid_len = buf_idx; + pstr->valid_raw_len = buf_idx; +} + +/* This function re-construct the buffers. + Concretely, convert to wide character in case of pstr->mb_cur_max > 1, + convert to upper case in case of REG_ICASE, apply translation. */ + +static reg_errcode_t +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) +{ + int offset = idx - pstr->raw_mbs_idx; + if (BE (offset < 0, 0)) + { + /* Reset buffer. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); +#endif /* RE_ENABLE_I18N */ + pstr->len = pstr->raw_len; + pstr->stop = pstr->raw_stop; + pstr->valid_len = 0; + pstr->raw_mbs_idx = 0; + pstr->valid_raw_len = 0; + pstr->offsets_needed = 0; + pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + if (!pstr->mbs_allocated) + pstr->mbs = (unsigned char *) pstr->raw_mbs; + offset = idx; + } + + if (BE (offset != 0, 1)) + { + /* Should the already checked characters be kept? */ + if (BE (offset < pstr->valid_raw_len, 1)) + { + /* Yes, move them to the front of the buffer. */ +#ifdef RE_ENABLE_I18N + if (BE (pstr->offsets_needed, 0)) + { + int low = 0, high = pstr->valid_len, mid; + do + { + mid = (high + low) / 2; + if (pstr->offsets[mid] > offset) + high = mid; + else if (pstr->offsets[mid] < offset) + low = mid + 1; + else + break; + } + while (low < high); + if (pstr->offsets[mid] < offset) + ++mid; + pstr->tip_context = re_string_context_at (pstr, mid - 1, + eflags); + /* This can be quite complicated, so handle specially + only the common and easy case where the character with + different length representation of lower and upper + case is present at or after offset. */ + if (pstr->valid_len > offset + && mid == offset && pstr->offsets[mid] == offset) + { + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); + memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; + for (low = 0; low < pstr->valid_len; low++) + pstr->offsets[low] = pstr->offsets[low + offset] - offset; + } + else + { + /* Otherwise, just find out how long the partial multibyte + character at offset is and fill it with WEOF/255. */ + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + while (mid > 0 && pstr->offsets[mid - 1] == offset) + --mid; + while (mid < pstr->valid_len) + if (pstr->wcs[mid] != WEOF) + break; + else + ++mid; + if (mid == pstr->valid_len) + pstr->valid_len = 0; + else + { + pstr->valid_len = pstr->offsets[mid] - offset; + if (pstr->valid_len) + { + for (low = 0; low < pstr->valid_len; ++low) + pstr->wcs[low] = WEOF; + memset (pstr->mbs, 255, pstr->valid_len); + } + } + pstr->valid_raw_len = pstr->valid_len; + } + } + else +#endif + { + pstr->tip_context = re_string_context_at (pstr, offset - 1, + eflags); +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; + pstr->valid_raw_len -= offset; +#if DEBUG + assert (pstr->valid_len > 0); +#endif + } + } + else + { +#ifdef RE_ENABLE_I18N + /* No, skip all characters until IDX. */ + int prev_valid_len = pstr->valid_len; + + if (BE (pstr->offsets_needed, 0)) + { + pstr->len = pstr->raw_len - idx + offset; + pstr->stop = pstr->raw_stop - idx + offset; + pstr->offsets_needed = 0; + } +#endif + pstr->valid_len = 0; +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + int wcs_idx; + wint_t wc = WEOF; + + if (pstr->is_utf8) + { + const unsigned char *raw, *p, *end; + + /* Special case UTF-8. Multi-byte chars start with any + byte other than 0x80 - 0xbf. */ + raw = pstr->raw_mbs + pstr->raw_mbs_idx; + end = raw + (offset - pstr->mb_cur_max); + if (end < pstr->raw_mbs) + end = pstr->raw_mbs; + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + /* pstr->valid_len = 0; */ + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = __mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } + } + + if (wc == WEOF) + pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, prev_valid_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + if (BE (pstr->valid_len, 0)) + { + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + if (pstr->mbs_allocated) + memset (pstr->mbs, 255, pstr->valid_len); + } + pstr->valid_raw_len = pstr->valid_len; + } + else +#endif /* RE_ENABLE_I18N */ + { + int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; + if (pstr->trans) + c = pstr->trans[c]; + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); + } + } + if (!BE (pstr->mbs_allocated, 0)) + pstr->mbs += offset; + } + pstr->raw_mbs_idx = idx; + pstr->len -= offset; + pstr->stop -= offset; + + /* Then build the buffers. */ +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + if (pstr->icase) + { + reg_errcode_t ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else + build_wcs_buffer (pstr); + } + else +#endif /* RE_ENABLE_I18N */ + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; + + pstr->cur_idx = 0; + return REG_NOERROR; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) +{ + int ch, off; + + /* Handle the common (easiest) cases first. */ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_peek_byte (pstr, idx); + +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1 + && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) + return re_string_peek_byte (pstr, idx); +#endif + + off = pstr->cur_idx + idx; +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + off = pstr->offsets[off]; +#endif + + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + +#ifdef RE_ENABLE_I18N + /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I + this function returns CAPITAL LETTER I instead of first byte of + DOTLESS SMALL LETTER I. The latter would confuse the parser, + since peek_byte_case doesn't advance cur_idx in any way. */ + if (pstr->offsets_needed && !isascii (ch)) + return re_string_peek_byte (pstr, idx); +#endif + + return ch; +} + +static unsigned char +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) +{ + if (BE (!pstr->mbs_allocated, 1)) + return re_string_fetch_byte (pstr); + +#ifdef RE_ENABLE_I18N + if (pstr->offsets_needed) + { + int off, ch; + + /* For tr_TR.UTF-8 [[:islower:]] there is + [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip + in that case the whole multi-byte character and return + the original letter. On the other side, with + [[: DOTLESS SMALL LETTER I return [[:I, as doing + anything else would complicate things too much. */ + + if (!re_string_first_byte (pstr, pstr->cur_idx)) + return re_string_fetch_byte (pstr); + + off = pstr->offsets[pstr->cur_idx]; + ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; + + if (! isascii (ch)) + return re_string_fetch_byte (pstr); + + re_string_skip_bytes (pstr, + re_string_char_size_at (pstr, pstr->cur_idx)); + return ch; + } +#endif + + return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; +} + +static void +internal_function +re_string_destruct (re_string_t *pstr) +{ +#ifdef RE_ENABLE_I18N + re_free (pstr->wcs); + re_free (pstr->offsets); +#endif /* RE_ENABLE_I18N */ + if (pstr->mbs_allocated) + re_free (pstr->mbs); +} + +/* Return the context at IDX in INPUT. */ + +static unsigned int +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) +{ + int c; + if (BE (idx < 0, 0)) + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; + if (BE (idx == input->len, 0)) + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); +#ifdef RE_ENABLE_I18N + if (input->mb_cur_max > 1) + { + wint_t wc; + int wc_idx = idx; + while(input->wcs[wc_idx] == WEOF) + { +#ifdef DEBUG + /* It must not happen. */ + assert (wc_idx >= 0); +#endif + --wc_idx; + if (wc_idx < 0) + return input->tip_context; + } + wc = input->wcs[wc_idx]; + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) + return CONTEXT_WORD; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); + } + else +#endif + { + c = re_string_byte_at (input, idx); + if (bitset_contain (input->word_char, c)) + return CONTEXT_WORD; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; + } +} + +/* Functions for set operation. */ + +static reg_errcode_t +internal_function +re_node_set_alloc (re_node_set *set, int size) +{ + /* + * ADR: valgrind says size can be 0, which then doesn't + * free the block of size 0. Harumph. This seems + * to work ok, though. + */ + if (size == 0) + { + memset(set, 0, sizeof(*set)); + return REG_NOERROR; + } + set->alloc = size; + set->nelem = 0; + set->elems = re_malloc (int, size); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_1 (re_node_set *set, int elem) +{ + set->alloc = 1; + set->nelem = 1; + set->elems = re_malloc (int, 1); + if (BE (set->elems == NULL, 0)) + { + set->alloc = set->nelem = 0; + return REG_ESPACE; + } + set->elems[0] = elem; + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) +{ + set->alloc = 2; + set->elems = re_malloc (int, 2); + if (BE (set->elems == NULL, 0)) + return REG_ESPACE; + if (elem1 == elem2) + { + set->nelem = 1; + set->elems[0] = elem1; + } + else + { + set->nelem = 2; + if (elem1 < elem2) + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } + else + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) +{ + dest->nelem = src->nelem; + if (src->nelem > 0) + { + dest->alloc = dest->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + { + dest->alloc = dest->nelem = 0; + return REG_ESPACE; + } + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + } + else + re_node_set_init_empty (dest); + return REG_NOERROR; +} + +/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. + Note: We assume dest->elems is NULL, when dest->alloc is 0. */ + +static reg_errcode_t +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, is, id, delta, sbase; + if (src1->nelem == 0 || src2->nelem == 0) + return REG_NOERROR; + + /* We need dest->nelem + 2 * elems_in_intersection; this is a + conservative estimate. */ + if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) + { + int new_alloc = src1->nelem + src2->nelem + dest->alloc; + int *new_elems = re_realloc (dest->elems, int, new_alloc); + if (BE (new_elems == NULL, 0)) + return REG_ESPACE; + dest->elems = new_elems; + dest->alloc = new_alloc; + } + + /* Find the items in the intersection of SRC1 and SRC2, and copy + into the top of DEST those that are not already in DEST itself. */ + sbase = dest->nelem + src1->nelem + src2->nelem; + i1 = src1->nelem - 1; + i2 = src2->nelem - 1; + id = dest->nelem - 1; + for (;;) + { + if (src1->elems[i1] == src2->elems[i2]) + { + /* Try to find the item in DEST. Maybe we could binary search? */ + while (id >= 0 && dest->elems[id] > src1->elems[i1]) + --id; + + if (id < 0 || dest->elems[id] != src1->elems[i1]) + dest->elems[--sbase] = src1->elems[i1]; + + if (--i1 < 0 || --i2 < 0) + break; + } + + /* Lower the highest of the two items. */ + else if (src1->elems[i1] < src2->elems[i2]) + { + if (--i2 < 0) + break; + } + else + { + if (--i1 < 0) + break; + } + } + + id = dest->nelem - 1; + is = dest->nelem + src1->nelem + src2->nelem - 1; + delta = is - sbase + 1; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place; this is more or + less the same loop that is in re_node_set_merge. */ + dest->nelem += delta; + if (delta > 0 && id >= 0) + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + break; + } + } + + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); + + return REG_NOERROR; +} + +/* Calculate the union set of the sets SRC1 and SRC2. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) +{ + int i1, i2, id; + if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) + { + dest->alloc = src1->nelem + src2->nelem; + dest->elems = re_malloc (int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } + else + { + if (src1 != NULL && src1->nelem > 0) + return re_node_set_init_copy (dest, src1); + else if (src2 != NULL && src2->nelem > 0) + return re_node_set_init_copy (dest, src2); + else + re_node_set_init_empty (dest); + return REG_NOERROR; + } + for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) + { + if (src1->elems[i1] > src2->elems[i2]) + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } + if (src1->elems[i1] == src2->elems[i2]) + ++i2; + dest->elems[id++] = src1->elems[i1++]; + } + if (i1 < src1->nelem) + { + memcpy (dest->elems + id, src1->elems + i1, + (src1->nelem - i1) * sizeof (int)); + id += src1->nelem - i1; + } + else if (i2 < src2->nelem) + { + memcpy (dest->elems + id, src2->elems + i2, + (src2->nelem - i2) * sizeof (int)); + id += src2->nelem - i2; + } + dest->nelem = id; + return REG_NOERROR; +} + +/* Calculate the union set of the sets DEST and SRC. And store it to + DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ + +static reg_errcode_t +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) +{ + int is, id, sbase, delta; + if (src == NULL || src->nelem == 0) + return REG_NOERROR; + if (dest->alloc < 2 * src->nelem + dest->nelem) + { + int new_alloc = 2 * (src->nelem + dest->alloc); + int *new_buffer = re_realloc (dest->elems, int, new_alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; + dest->alloc = new_alloc; + } + + if (BE (dest->nelem == 0, 0)) + { + dest->nelem = src->nelem; + memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); + return REG_NOERROR; + } + + /* Copy into the top of DEST the items of SRC that are not + found in DEST. Maybe we could binary search in DEST? */ + for (sbase = dest->nelem + 2 * src->nelem, + is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) + { + if (dest->elems[id] == src->elems[is]) + is--, id--; + else if (dest->elems[id] < src->elems[is]) + dest->elems[--sbase] = src->elems[is--]; + else /* if (dest->elems[id] > src->elems[is]) */ + --id; + } + + if (is >= 0) + { + /* If DEST is exhausted, the remaining items of SRC must be unique. */ + sbase -= is + 1; + memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); + } + + id = dest->nelem - 1; + is = dest->nelem + 2 * src->nelem - 1; + delta = is - sbase + 1; + if (delta == 0) + return REG_NOERROR; + + /* Now copy. When DELTA becomes zero, the remaining + DEST elements are already in place. */ + dest->nelem += delta; + for (;;) + { + if (dest->elems[is] > dest->elems[id]) + { + /* Copy from the top. */ + dest->elems[id + delta--] = dest->elems[is--]; + if (delta == 0) + break; + } + else + { + /* Slide from the bottom. */ + dest->elems[id + delta] = dest->elems[id]; + if (--id < 0) + { + /* Copy remaining SRC elements. */ + memcpy (dest->elems, dest->elems + sbase, + delta * sizeof (int)); + break; + } + } + } + + return REG_NOERROR; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have ELEM. + return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert (re_node_set *set, int elem) +{ + int idx; + /* In case the set is empty. */ + if (set->alloc == 0) + { + if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) + return 1; + else + return -1; + } + + if (BE (set->nelem, 0) == 0) + { + /* We already guaranteed above that set->alloc != 0. */ + set->elems[0] = elem; + ++set->nelem; + return 1; + } + + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = set->alloc * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Move the elements which follows the new element. Test the + first element separately to skip a check in the inner loop. */ + if (elem < set->elems[0]) + { + idx = 0; + for (idx = set->nelem; idx > 0; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + else + { + for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) + set->elems[idx] = set->elems[idx - 1]; + } + + /* Insert the new element. */ + set->elems[idx] = elem; + ++set->nelem; + return 1; +} + +/* Insert the new element ELEM to the re_node_set* SET. + SET should not already have any element greater than or equal to ELEM. + Return -1 if an error is occured, return 1 otherwise. */ + +static int +internal_function +re_node_set_insert_last (re_node_set *set, int elem) +{ + /* Realloc if we need. */ + if (set->alloc == set->nelem) + { + int *new_elems; + set->alloc = (set->alloc + 1) * 2; + new_elems = re_realloc (set->elems, int, set->alloc); + if (BE (new_elems == NULL, 0)) + return -1; + set->elems = new_elems; + } + + /* Insert the new element. */ + set->elems[set->nelem++] = elem; + return 1; +} + +/* Compare two node sets SET1 and SET2. + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) +{ + int i; + if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) + return 0; + for (i = set1->nelem ; --i >= 0 ; ) + if (set1->elems[i] != set2->elems[i]) + return 0; + return 1; +} + +/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ + +static int +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) +{ + unsigned int idx, right, mid; + if (set->nelem <= 0) + return 0; + + /* Binary search the element. */ + idx = 0; + right = set->nelem - 1; + while (idx < right) + { + mid = (idx + right) / 2; + if (set->elems[mid] < elem) + idx = mid + 1; + else + right = mid; + } + return set->elems[idx] == elem ? idx + 1 : 0; +} + +static void +internal_function +re_node_set_remove_at (re_node_set *set, int idx) +{ + if (idx < 0 || idx >= set->nelem) + return; + --set->nelem; + for (; idx < set->nelem; idx++) + set->elems[idx] = set->elems[idx + 1]; +} + + +/* Add the token TOKEN to dfa->nodes, and return the index of the token. + Or return -1, if an error will be occured. */ + +static int +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) +{ + if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) + { + size_t new_nodes_alloc = dfa->nodes_alloc * 2; + int *new_nexts, *new_indices; + re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows in realloc. */ + const size_t max_object_size = MAX (sizeof (re_token_t), + MAX (sizeof (re_node_set), + sizeof (int))); + if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0)) + return -1; + + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); + if (BE (new_nodes == NULL, 0)) + return -1; + dfa->nodes = new_nodes; + new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); + new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); + if (BE (new_nexts == NULL || new_indices == NULL + || new_edests == NULL || new_eclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->org_indices = new_indices; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->nodes_alloc = new_nodes_alloc; + } + dfa->nodes[dfa->nodes_len] = token; + dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (token.type == OP_PERIOD && dfa->mb_cur_max > 1) || token.type == COMPLEX_BRACKET; +#endif + dfa->nexts[dfa->nodes_len] = -1; + re_node_set_init_empty (dfa->edests + dfa->nodes_len); + re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); + return dfa->nodes_len++; +} + +static inline unsigned int +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) +{ + unsigned int hash = nodes->nelem + context; + int i; + for (i = 0 ; i < nodes->nelem ; i++) + hash += nodes->elems[i]; + return hash; +} + +/* Search for the state whose node_set is equivalent to NODES. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (BE (nodes->nelem == 0, 0)) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, 0); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (hash != state->hash) + continue; + if (re_node_set_compare (&state->nodes, nodes)) + return state; + } + + /* There are no appropriate state in the dfa, create the new one. */ + new_state = create_ci_newstate (dfa, nodes, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Search for the state whose node_set is equivalent to NODES and + whose context is equivalent to CONTEXT. + Return the pointer to the state, if we found it in the DFA. + Otherwise create the new one and return it. In case of an error + return NULL and set the error code in ERR. + Note: - We assume NULL as the invalid state, then it is possible that + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ + +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) +{ + unsigned int hash; + re_dfastate_t *new_state; + struct re_state_table_entry *spot; + int i; + if (nodes->nelem == 0) + { + *err = REG_NOERROR; + return NULL; + } + hash = calc_state_hash (nodes, context); + spot = dfa->state_table + (hash & dfa->state_hash_mask); + + for (i = 0 ; i < spot->num ; i++) + { + re_dfastate_t *state = spot->array[i]; + if (state->hash == hash + && state->context == context + && re_node_set_compare (state->entrance_nodes, nodes)) + return state; + } + /* There are no appropriate state in `dfa', create the new one. */ + new_state = create_cd_newstate (dfa, nodes, context, hash); + if (BE (new_state == NULL, 0)) + *err = REG_ESPACE; + + return new_state; +} + +/* Finish initialization of the new state NEWSTATE, and using its hash value + HASH put in the appropriate bucket of DFA's state table. Return value + indicates the error code if failed. */ + +static reg_errcode_t +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) +{ + struct re_state_table_entry *spot; + reg_errcode_t err; + int i; + + newstate->hash = hash; + err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < newstate->nodes.nelem; i++) + { + int elem = newstate->nodes.elems[i]; + if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) + if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0) + return REG_ESPACE; + } + + spot = dfa->state_table + (hash & dfa->state_hash_mask); + if (BE (spot->alloc <= spot->num, 0)) + { + int new_alloc = 2 * spot->num + 2; + re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, + new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; + spot->alloc = new_alloc; + } + spot->array[spot->num++] = newstate; + return REG_NOERROR; +} + +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + +/* Create the new state which is independ of contexts. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) +{ + int i; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->entrance_nodes = &newstate->nodes; + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + if (type == CHARACTER && !node->constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + else if (type == ANCHOR || node->constraint) + newstate->has_constraint = 1; + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +/* Create the new state which is depend on the context CONTEXT. + Return the new state if succeeded, otherwise return NULL. */ + +static re_dfastate_t * +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) +{ + int i, nctx_nodes = 0; + reg_errcode_t err; + re_dfastate_t *newstate; + + newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); + if (BE (newstate == NULL, 0)) + return NULL; + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } + + newstate->context = context; + newstate->entrance_nodes = &newstate->nodes; + + for (i = 0 ; i < nodes->nelem ; i++) + { + re_token_t *node = dfa->nodes + nodes->elems[i]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + if (type == CHARACTER && !constraint) + continue; +#ifdef RE_ENABLE_I18N + newstate->accept_mb |= node->accept_mb; +#endif /* RE_ENABLE_I18N */ + + /* If the state has the halt node, the state is a halt state. */ + if (type == END_OF_RE) + newstate->halt = 1; + else if (type == OP_BACK_REF) + newstate->has_backref = 1; + + if (constraint) + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); + if (BE (newstate->entrance_nodes == NULL, 0)) + { + free_state (newstate); + return NULL; + } + if (re_node_set_init_copy (newstate->entrance_nodes, nodes) + != REG_NOERROR) + return NULL; + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } + } + err = register_state (dfa, newstate, hash); + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} diff --git a/compat/regex/regex_internal.h b/compat/regex/regex_internal.h new file mode 100644 index 0000000000..4184d7f5a6 --- /dev/null +++ b/compat/regex/regex_internal.h @@ -0,0 +1,810 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2008, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _REGEX_INTERNAL_H +#define _REGEX_INTERNAL_H 1 + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC +# include <langinfo.h> +#endif +#if defined HAVE_LOCALE_H || defined _LIBC +# include <locale.h> +#endif +#if defined HAVE_WCHAR_H || defined _LIBC +# include <wchar.h> +#endif /* HAVE_WCHAR_H || _LIBC */ +#if defined HAVE_WCTYPE_H || defined _LIBC +# include <wctype.h> +#endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include <stdbool.h> +#endif /* HAVE_STDBOOL_H || _LIBC */ +#if !defined(ZOS_USS) +#if defined HAVE_STDINT_H || defined _LIBC +# include <stdint.h> +#endif /* HAVE_STDINT_H || _LIBC */ +#endif /* !ZOS_USS */ +#if defined _LIBC +# include <bits/libc-lock.h> +#else +# define __libc_lock_define(CLASS,NAME) +# define __libc_lock_init(NAME) do { } while (0) +# define __libc_lock_lock(NAME) do { } while (0) +# define __libc_lock_unlock(NAME) do { } while (0) +#endif + +#ifndef GAWK +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif +#else /* GAWK */ +/* + * This is a freaking mess. On glibc systems you have to define + * a magic constant to get isblank() out of <ctype.h>, since it's + * a C99 function. To heck with all that and borrow a page from + * dfa.c's book. + */ + +static int +is_blank (int c) +{ + return (c == ' ' || c == '\t'); +} +#endif /* GAWK */ + +#ifdef _LIBC +# ifndef _RE_DEFINE_LOCALE_FUNCTIONS +# define _RE_DEFINE_LOCALE_FUNCTIONS 1 +# include <locale/localeinfo.h> +# include <locale/elem-hash.h> +# include <locale/coll-lookup.h> +# endif +#endif + +/* This is for other GNU distributions with internationalized messages. */ +#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC +# include <libintl.h> +# ifdef _LIBC +# undef gettext +# define gettext(msgid) \ + INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) +# endif +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +#endif + +/* For loser systems without the definition. */ +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t) -1) +#endif + +#ifndef NO_MBSUPPORT +#include "mbsupport.h" /* gawk */ +#endif +#ifndef MB_CUR_MAX +#define MB_CUR_MAX 1 +#endif + +#if (defined MBS_SUPPORT) || _LIBC +# define RE_ENABLE_I18N +#endif + +#if __GNUC__ >= 3 +# define BE(expr, val) __builtin_expect (expr, val) +#else +# define BE(expr, val) (expr) +# ifdef inline +# undef inline +# endif +# define inline +#endif + +/* Number of single byte character. */ +#define SBC_MAX 256 + +#define COLL_ELEM_LEN_MAX 8 + +/* The character which represents newline. */ +#define NEWLINE_CHAR '\n' +#define WIDE_NEWLINE_CHAR L'\n' + +/* Rename to standard API for using out of glibc. */ +#ifndef _LIBC +# ifdef __wctype +# undef __wctype +# endif +# define __wctype wctype +# ifdef __iswctype +# undef __iswctype +# endif +# define __iswctype iswctype +# define __btowc btowc +# define __mbrtowc mbrtowc +#undef __mempcpy /* GAWK */ +# define __mempcpy mempcpy +# define __wcrtomb wcrtomb +# define __regfree regfree +# define attribute_hidden +#endif /* not _LIBC */ + +#ifdef __GNUC__ +# define __attribute(arg) __attribute__ (arg) +#else +# define __attribute(arg) +#endif + +extern const char __re_error_msgid[] attribute_hidden; +extern const size_t __re_error_msgid_idx[] attribute_hidden; + +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) + +#define PREV_WORD_CONSTRAINT 0x0001 +#define PREV_NOTWORD_CONSTRAINT 0x0002 +#define NEXT_WORD_CONSTRAINT 0x0004 +#define NEXT_NOTWORD_CONSTRAINT 0x0008 +#define PREV_NEWLINE_CONSTRAINT 0x0010 +#define NEXT_NEWLINE_CONSTRAINT 0x0020 +#define PREV_BEGBUF_CONSTRAINT 0x0040 +#define NEXT_ENDBUF_CONSTRAINT 0x0080 +#define WORD_DELIM_CONSTRAINT 0x0100 +#define NOT_WORD_DELIM_CONSTRAINT 0x0200 + +typedef enum +{ + INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, + WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, + LINE_FIRST = PREV_NEWLINE_CONSTRAINT, + LINE_LAST = NEXT_NEWLINE_CONSTRAINT, + BUF_FIRST = PREV_BEGBUF_CONSTRAINT, + BUF_LAST = NEXT_ENDBUF_CONSTRAINT, + WORD_DELIM = WORD_DELIM_CONSTRAINT, + NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT +} re_context_type; + +typedef struct +{ + int alloc; + int nelem; + int *elems; +} re_node_set; + +typedef enum +{ + NON_TYPE = 0, + + /* Node type, These are used by token, node, tree. */ + CHARACTER = 1, + END_OF_RE = 2, + SIMPLE_BRACKET = 3, + OP_BACK_REF = 4, + OP_PERIOD = 5, +#ifdef RE_ENABLE_I18N + COMPLEX_BRACKET = 6, + OP_UTF8_PERIOD = 7, +#endif /* RE_ENABLE_I18N */ + + /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used + when the debugger shows values of this enum type. */ +#define EPSILON_BIT 8 + OP_OPEN_SUBEXP = EPSILON_BIT | 0, + OP_CLOSE_SUBEXP = EPSILON_BIT | 1, + OP_ALT = EPSILON_BIT | 2, + OP_DUP_ASTERISK = EPSILON_BIT | 3, + ANCHOR = EPSILON_BIT | 4, + + /* Tree type, these are used only by tree. */ + CONCAT = 16, + SUBEXP = 17, + + /* Token type, these are used only by token. */ + OP_DUP_PLUS = 18, + OP_DUP_QUESTION, + OP_OPEN_BRACKET, + OP_CLOSE_BRACKET, + OP_CHARSET_RANGE, + OP_OPEN_DUP_NUM, + OP_CLOSE_DUP_NUM, + OP_NON_MATCH_LIST, + OP_OPEN_COLL_ELEM, + OP_CLOSE_COLL_ELEM, + OP_OPEN_EQUIV_CLASS, + OP_CLOSE_EQUIV_CLASS, + OP_OPEN_CHAR_CLASS, + OP_CLOSE_CHAR_CLASS, + OP_WORD, + OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, + BACK_SLASH + +} re_token_type_t; + +#ifdef RE_ENABLE_I18N +typedef struct +{ + /* Multibyte characters. */ + wchar_t *mbchars; + + /* Collating symbols. */ +# ifdef _LIBC + int32_t *coll_syms; +# endif + + /* Equivalence classes. */ +# ifdef _LIBC + int32_t *equiv_classes; +# endif + + /* Range expressions. */ +# ifdef _LIBC + uint32_t *range_starts; + uint32_t *range_ends; +# else /* not _LIBC */ + wchar_t *range_starts; + wchar_t *range_ends; +# endif /* not _LIBC */ + + /* Character classes. */ + wctype_t *char_classes; + + /* If this character set is the non-matching list. */ + unsigned int non_match : 1; + + /* # of multibyte characters. */ + int nmbchars; + + /* # of collating symbols. */ + int ncoll_syms; + + /* # of equivalence classes. */ + int nequiv_classes; + + /* # of range expressions. */ + int nranges; + + /* # of character classes. */ + int nchar_classes; +} re_charset_t; +#endif /* RE_ENABLE_I18N */ + +typedef struct +{ + union + { + unsigned char c; /* for CHARACTER */ + re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N + re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ + int idx; /* for BACK_REF */ + re_context_type ctx_type; /* for ANCHOR */ + } opr; +#if __GNUC__ >= 2 + re_token_type_t type : 8; +#else + re_token_type_t type; +#endif + unsigned int constraint : 10; /* context constraint */ + unsigned int duplicated : 1; + unsigned int opt_subexp : 1; +#ifdef RE_ENABLE_I18N + unsigned int accept_mb : 1; + /* These 2 bits can be moved into the union if needed (e.g. if running out + of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ + unsigned int mb_partial : 1; +#endif + unsigned int word_char : 1; +} re_token_t; + +#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) + +struct re_string_t +{ + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Store the multibyte string. In case of "case insensitive mode" like + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; +#ifdef RE_ENABLE_I18N + /* Store the wide character string which is corresponding to MBS. */ + wint_t *wcs; + int *offsets; + mbstate_t cur_state; +#endif + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The corresponding number of bytes in raw_mbs array. */ + int valid_raw_len; + /* The length of the buffers MBS and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* length of RAW_MBS array. */ + int raw_len; + /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ + int len; + /* End of the buffer may be shorter than its length in the cases such + as re_match_2, re_search_2. Then, we use STOP for end of the buffer + instead of LEN. */ + int raw_stop; + /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ + int stop; + + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* Copy of re_dfa_t's word_char. */ + re_const_bitset_ptr_t word_char; + /* 1 if REG_ICASE. */ + unsigned char icase; + unsigned char is_utf8; + unsigned char map_notascii; + unsigned char mbs_allocated; + unsigned char offsets_needed; + unsigned char newline_anchor; + unsigned char word_ops_used; + int mb_cur_max; +}; +typedef struct re_string_t re_string_t; + + +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; + +#ifndef _LIBC +# ifdef __i386__ +# define internal_function __attribute ((regparm (3), stdcall)) +# else +# define internal_function +# endif +#endif + +#ifndef NOT_IN_libc +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len) + internal_function; +# ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr) internal_function; +static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr) + internal_function; +# endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr) internal_function; +static void re_string_translate_buffer (re_string_t *pstr) internal_function; +static unsigned int re_string_context_at (const re_string_t *input, int idx, + int eflags) + internal_function __attribute ((pure)); +#endif +#define re_string_peek_byte(pstr, offset) \ + ((pstr)->mbs[(pstr)->cur_idx + offset]) +#define re_string_fetch_byte(pstr) \ + ((pstr)->mbs[(pstr)->cur_idx++]) +#define re_string_first_byte(pstr, idx) \ + ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) +#define re_string_is_single_byte_char(pstr, idx) \ + ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ + || (pstr)->wcs[(idx) + 1] != WEOF)) +#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) +#define re_string_cur_idx(pstr) ((pstr)->cur_idx) +#define re_string_get_buffer(pstr) ((pstr)->mbs) +#define re_string_length(pstr) ((pstr)->len) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) +#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) +#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) + +#ifndef _LIBC +# if HAVE_ALLOCA +# if (_MSC_VER) +# include <malloc.h> +# define __libc_use_alloca(n) 0 +# else +# include <alloca.h> +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# endif +# else +/* alloca is implemented with malloc, so just use malloc. */ +# define __libc_use_alloca(n) 0 +# endif +#endif + +#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) +/* SunOS 4.1.x realloc doesn't accept null pointers: pre-Standard C. Sigh. */ +#define re_realloc(p,t,n) ((p != NULL) ? (t *) realloc (p,(n)*sizeof(t)) : (t *) calloc(n,sizeof(t))) +#define re_free(p) free (p) + +struct bin_tree_t +{ + struct bin_tree_t *parent; + struct bin_tree_t *left; + struct bin_tree_t *right; + struct bin_tree_t *first; + struct bin_tree_t *next; + + re_token_t token; + + /* `node_idx' is the index in dfa->nodes, if `type' == 0. + Otherwise `type' indicate the type of this node. */ + int node_idx; +}; +typedef struct bin_tree_t bin_tree_t; + +#define BIN_TREE_STORAGE_SIZE \ + ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) + +struct bin_tree_storage_t +{ + struct bin_tree_storage_t *next; + bin_tree_t data[BIN_TREE_STORAGE_SIZE]; +}; +typedef struct bin_tree_storage_t bin_tree_storage_t; + +#define CONTEXT_WORD 1 +#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) +#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) +#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) + +#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) +#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) +#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) +#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) +#define IS_ORDINARY_CONTEXT(c) ((c) == 0) + +#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') +#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) +#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') +#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) + +#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ + ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ + || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) + +#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ + ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ + || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ + || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) + +struct re_dfastate_t +{ + unsigned int hash; + re_node_set nodes; + re_node_set non_eps_nodes; + re_node_set inveclosure; + re_node_set *entrance_nodes; + struct re_dfastate_t **trtable, **word_trtable; + unsigned int context : 4; + unsigned int halt : 1; + /* If this state can accept `multi byte'. + Note that we refer to multibyte characters, and multi character + collating elements as `multi byte'. */ + unsigned int accept_mb : 1; + /* If this state has backreference node(s). */ + unsigned int has_backref : 1; + unsigned int has_constraint : 1; +}; +typedef struct re_dfastate_t re_dfastate_t; + +struct re_state_table_entry +{ + int num; + int alloc; + re_dfastate_t **array; +}; + +/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ + +typedef struct +{ + int next_idx; + int alloc; + re_dfastate_t **array; +} state_array_t; + +/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ + +typedef struct +{ + int node; + int str_idx; /* The position NODE match at. */ + state_array_t path; +} re_sub_match_last_t; + +/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. + And information about the node, whose type is OP_CLOSE_SUBEXP, + corresponding to NODE is stored in LASTS. */ + +typedef struct +{ + int str_idx; + int node; + state_array_t *path; + int alasts; /* Allocation size of LASTS. */ + int nlasts; /* The number of LASTS. */ + re_sub_match_last_t **lasts; +} re_sub_match_top_t; + +struct re_backref_cache_entry +{ + int node; + int str_idx; + int subexp_from; + int subexp_to; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; +}; + +typedef struct +{ + /* The string object corresponding to the input string. */ + re_string_t input; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + const re_dfa_t *const dfa; +#else + const re_dfa_t *dfa; +#endif + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + int last_node; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_mb_elem_len; + int nsub_tops; + int asub_tops; + re_sub_match_top_t **sub_tops; +} re_match_context_t; + +typedef struct +{ + re_dfastate_t **sifted_states; + re_dfastate_t **limited_states; + int last_node; + int last_str_idx; + re_node_set limits; +} re_sift_context_t; + +struct re_fail_stack_ent_t +{ + int idx; + int node; + regmatch_t *regs; + re_node_set eps_via_nodes; +}; + +struct re_fail_stack_t +{ + int num; + int alloc; + struct re_fail_stack_ent_t *stack; +}; + +struct re_dfa_t +{ + re_token_t *nodes; + size_t nodes_alloc; + size_t nodes_len; + int *nexts; + int *org_indices; + re_node_set *edests; + re_node_set *eclosures; + re_node_set *inveclosures; + struct re_state_table_entry *state_table; + re_dfastate_t *init_state; + re_dfastate_t *init_state_word; + re_dfastate_t *init_state_nl; + re_dfastate_t *init_state_begbuf; + bin_tree_t *str_tree; + bin_tree_storage_t *str_tree_storage; + re_bitset_ptr_t sb_char; + int str_tree_storage_idx; + + /* number of subexpressions `re_nsub' is in regex_t. */ + unsigned int state_hash_mask; + int init_node; + int nbackref; /* The number of backreference in this dfa. */ + + /* Bitmap expressing which backreference is used. */ + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; + + unsigned int has_plural_match : 1; + /* If this dfa has "multibyte node", which is a backreference or + a node which can accept multibyte character or multi character + collating element. */ + unsigned int has_mb_node : 1; + unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; + unsigned int word_ops_used : 1; + int mb_cur_max; + bitset_t word_char; + reg_syntax_t syntax; + int *subexp_map; +#ifdef DEBUG + char* re_str; +#endif +#if defined _LIBC + __libc_lock_define (, lock) +#endif +}; + +#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) +#define re_node_set_remove(set,id) \ + (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) +#define re_node_set_empty(p) ((p)->nelem = 0) +#define re_node_set_free(set) re_free ((set)->elems) + + +typedef enum +{ + SB_CHAR, + MB_CHAR, + EQUIV_CLASS, + COLL_SYM, + CHAR_CLASS +} bracket_elem_type; + +typedef struct +{ + bracket_elem_type type; + union + { + unsigned char ch; + unsigned char *name; + wchar_t wch; + } opr; +} bracket_elem_t; + + +/* Inline functions for bitset operation. */ +static inline void +bitset_not (bitset_t set) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + set[bitset_i] = ~set[bitset_i]; +} + +static inline void +bitset_merge (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] |= src[bitset_i]; +} + +static inline void +bitset_mask (bitset_t dest, const bitset_t src) +{ + int bitset_i; + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) + dest[bitset_i] &= src[bitset_i]; +} + +#ifdef RE_ENABLE_I18N +/* Inline functions for re_string. */ +static inline int +internal_function __attribute ((pure)) +re_string_char_size_at (const re_string_t *pstr, int idx) +{ + int byte_idx; + if (pstr->mb_cur_max == 1) + return 1; + for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) + if (pstr->wcs[idx + byte_idx] != WEOF) + break; + return byte_idx; +} + +static inline wint_t +internal_function __attribute ((pure)) +re_string_wchar_at (const re_string_t *pstr, int idx) +{ + if (pstr->mb_cur_max == 1) + return (wint_t) pstr->mbs[idx]; + return (wint_t) pstr->wcs[idx]; +} + +# ifndef NOT_IN_libc +static int +internal_function __attribute ((pure)) +re_string_elem_size_at (const re_string_t *pstr, int idx) +{ +# ifdef _LIBC + const unsigned char *p, *extra; + const int32_t *table, *indirect; + int32_t tmp; +# include <locale/weight.h> + uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + + if (nrules != 0) + { + table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_INDIRECTMB); + p = pstr->mbs + idx; + tmp = findidx (&p); + return p - pstr->mbs - idx; + } + else +# endif /* _LIBC */ + return 1; +} +# endif +#endif /* RE_ENABLE_I18N */ + +#endif /* _REGEX_INTERNAL_H */ diff --git a/compat/regex/regexec.c b/compat/regex/regexec.c new file mode 100644 index 0000000000..0194965c5d --- /dev/null +++ b/compat/regex/regexec.c @@ -0,0 +1,4369 @@ +/* Extended regular expression matching and search library. + Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. */ + +static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, + int n) internal_function; +static void match_ctx_clean (re_match_context_t *mctx) internal_function; +static void match_ctx_free (re_match_context_t *cache) internal_function; +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, + int str_idx, int from, int to) + internal_function; +static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) + internal_function; +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, + int str_idx) internal_function; +static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, + int node, int str_idx) + internal_function; +static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, + int last_str_idx) + internal_function; +static reg_errcode_t re_search_internal (const regex_t *preg, + const char *string, int length, + int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags); +static int re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop, int ret_len); +static int re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, struct re_registers *regs, + int ret_len); +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + int nregs, int regs_allocated); +static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx); +static int check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) internal_function; +static int check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) + internal_function; +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, + int cur_idx, int nmatch) internal_function; +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, + int str_idx, int dest_node, int nregs, + regmatch_t *regs, + re_node_set *eps_via_nodes) + internal_function; +static reg_errcode_t set_regs (const regex_t *preg, + const re_match_context_t *mctx, + size_t nmatch, regmatch_t *pmatch, + int fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; + +#ifdef RE_ENABLE_I18N +static int sift_states_iter_mb (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates) + internal_function; +static int check_dst_limits (const re_match_context_t *mctx, + re_node_set *limits, + int dst_node, int dst_idx, int src_node, + int src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, + int boundaries, int subexp_idx, + int from_node, int bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, + int limit, int subexp_idx, + int node, int str_idx, + int bkref_idx) internal_function; +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, + re_node_set *dest_nodes, + const re_node_set *candidates, + re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, + int str_idx) internal_function; +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, + re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; +static re_dfastate_t *find_recover_state (reg_errcode_t *err, + re_match_context_t *mctx) internal_function; +static re_dfastate_t *transit_state (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *state) internal_function; +static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *next_state) + internal_function; +static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, + re_node_set *cur_nodes, + int str_idx) internal_function; +#if 0 +static re_dfastate_t *transit_state_sb (reg_errcode_t *err, + re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif +#ifdef RE_ENABLE_I18N +static reg_errcode_t transit_state_mb (re_match_context_t *mctx, + re_dfastate_t *pstate) + internal_function; +#endif /* RE_ENABLE_I18N */ +static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, + const re_node_set *nodes) + internal_function; +static reg_errcode_t get_subexp (re_match_context_t *mctx, + int bkref_node, int bkref_str_idx) + internal_function; +static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, + const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, + int bkref_node, int bkref_str) + internal_function; +static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) internal_function; +static reg_errcode_t check_arrival (re_match_context_t *mctx, + state_array_t *path, int top_node, + int top_str, int last_node, int last_str, + int type) internal_function; +static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, + int str_idx, + re_node_set *cur_nodes, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, + re_node_set *cur_nodes, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, + re_node_set *dst_nodes, + int target, int ex_subexp, + int type) internal_function; +static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, + re_node_set *cur_nodes, int cur_str, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; +#ifdef RE_ENABLE_I18N +static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int idx) + internal_function; +# ifdef _LIBC +static unsigned int find_collation_sequence_value (const unsigned char *mbs, + size_t name_len) + internal_function; +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ +static int group_nodes_into_DFAstates (const re_dfa_t *dfa, + const re_dfastate_t *state, + re_node_set *states_node, + bitset_t *states_ch) internal_function; +static int check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; + +/* Entry point for POSIX code. */ + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec ( + const regex_t *__restrict preg, + const char *__restrict string, + size_t nmatch, + regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t err; + int start, length; + + if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) + return REG_BADPAT; + + if (eflags & REG_STARTEND) + { + start = pmatch[0].rm_so; + length = pmatch[0].rm_eo; + } + else + { + start = 0; + length = strlen (string); + } + + __libc_lock_lock (dfa->lock); + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length - start, + length, 0, NULL, eflags); + else + err = re_search_internal (preg, string, length, start, length - start, + length, nmatch, pmatch, eflags); + __libc_lock_unlock (dfa->lock); + return err != REG_NOERROR; +} + +#ifdef _LIBC +# include <shlib-compat.h> +versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); + +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) +__typeof__ (__regexec) __compat_regexec; + +int +attribute_compat_text_section +__compat_regexec (const regex_t *__restrict preg, + const char *__restrict string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return regexec (preg, string, nmatch, pmatch, + eflags & (REG_NOTBOL | REG_NOTEOL)); +} +compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); +# endif +#endif + +/* Entry points for GNU code. */ + +/* re_match, re_search, re_match_2, re_search_2 + + The former two functions operate on STRING with length LENGTH, + while the later two operate on concatenation of STRING1 and STRING2 + with lengths LENGTH1 and LENGTH2, respectively. + + re_match() matches the compiled pattern in BUFP against the string, + starting at index START. + + re_search() first tries matching at index START, then it tries to match + starting from index START + 1, and so on. The last start position tried + is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same + way as re_match().) + + The parameter STOP of re_{match,search}_2 specifies that no match exceeding + the first STOP characters of the concatenation of the strings should be + concerned. + + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stroed in REGS. (For the "_2" variants, the offsets are + computed relative to the concatenation, not relative to the individual + strings.) + + On success, re_match* functions return the length of the match, re_search* + return the position of the start of the match. Return value -1 means no + match was found and -2 indicates an internal error. */ + +int +re_match (struct re_pattern_buffer *bufp, + const char *string, + int length, + int start, + struct re_registers *regs) +{ + return re_search_stub (bufp, string, length, start, 0, length, regs, 1); +} +#ifdef _LIBC +weak_alias (__re_match, re_match) +#endif + +int +re_search (struct re_pattern_buffer *bufp, + const char *string, + int length, int start, int range, + struct re_registers *regs) +{ + return re_search_stub (bufp, string, length, start, range, length, regs, 0); +} +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif + +int +re_match_2 (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + struct re_registers *regs, int stop) +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, 0, regs, stop, 1); +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +int +re_search_2 (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + int range, struct re_registers *regs, int stop) +{ + return re_search_2_stub (bufp, string1, length1, string2, length2, + start, range, regs, stop, 0); +} +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + +static int +re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, int length1, + const char *string2, int length2, int start, + int range, struct re_registers *regs, + int stop, int ret_len) +{ + const char *str; + int rval; + int len = length1 + length2; + int free_str = 0; + + if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + return -2; + + /* Concatenate the strings. */ + if (length2 > 0) + if (length1 > 0) + { + char *s = re_malloc (char, len); + + if (BE (s == NULL, 0)) + return -2; + memcpy (s, string1, length1); + memcpy (s + length1, string2, length2); + str = s; + free_str = 1; + } + else + str = string2; + else + str = string1; + + rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len); + if (free_str) + re_free ((char *) str); + return rval; +} + +/* The parameters have the same meaning as those of re_search. + Additional parameters: + If RET_LEN is nonzero the length of the match is returned (re_match style); + otherwise the position of the match is returned. */ + +static int +re_search_stub (struct re_pattern_buffer *bufp, + const char *string, int length, int start, + int range, int stop, + struct re_registers *regs, int ret_len) +{ + reg_errcode_t result; + regmatch_t *pmatch; + int nregs, rval; + int eflags = 0; + + /* Check for out-of-range. */ + if (BE (start < 0 || start > length, 0)) + return -1; + if (BE (start + range > length, 0)) + range = length - start; + else if (BE (start + range < 0, 0)) + range = -start; + + __libc_lock_lock (dfa->lock); + + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; + + /* Compile fastmap if we haven't yet. */ + if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + re_compile_fastmap (bufp); + + if (BE (bufp->no_sub, 0)) + regs = NULL; + + /* We need at least 1 register. */ + if (regs == NULL) + nregs = 1; + else if (BE (bufp->regs_allocated == REGS_FIXED && + regs->num_regs < bufp->re_nsub + 1, 0)) + { + nregs = regs->num_regs; + if (BE (nregs < 1, 0)) + { + /* Nothing can be copied to regs. */ + regs = NULL; + nregs = 1; + } + } + else + nregs = bufp->re_nsub + 1; + pmatch = re_malloc (regmatch_t, nregs); + if (BE (pmatch == NULL, 0)) + { + rval = -2; + goto out; + } + + result = re_search_internal (bufp, string, length, start, range, stop, + nregs, pmatch, eflags); + + rval = 0; + + /* I hope we needn't fill ther regs with -1's when no match was found. */ + if (result != REG_NOERROR) + rval = -1; + else if (regs != NULL) + { + /* If caller wants register contents data back, copy them. */ + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) + rval = -2; + } + + if (BE (rval == 0, 1)) + { + if (ret_len) + { + assert (pmatch[0].rm_so == start); + rval = pmatch[0].rm_eo - start; + } + else + rval = pmatch[0].rm_so; + } + re_free (pmatch); + out: + __libc_lock_unlock (dfa->lock); + return rval; +} + +static unsigned +re_copy_regs (struct re_registers *regs, + regmatch_t *pmatch, + int nregs, int regs_allocated) +{ + int rval = REGS_REALLOCATE; + int i; + int need_regs = nregs + 1; + /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + uses. */ + + /* Have the register data arrays been allocated? */ + if (regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. */ + regs->start = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0)) + return REGS_UNALLOCATED; + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->end == NULL, 0)) + { + re_free (regs->start); + return REGS_UNALLOCATED; + } + regs->num_regs = need_regs; + } + else if (regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (BE (need_regs > regs->num_regs, 0)) + { + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end; + if (BE (new_start == NULL, 0)) + return REGS_UNALLOCATED; + new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_end == NULL, 0)) + { + re_free (new_start); + return REGS_UNALLOCATED; + } + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; + } + } + else + { + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; + } + + /* Copy the regs. */ + for (i = 0; i < nregs; ++i) + { + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; + } + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; + + return rval; +} + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (struct re_pattern_buffer *bufp, + struct re_registers *regs, + unsigned num_regs, + regoff_t *starts, + regoff_t *ends) +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t *) 0; + } +} +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them unless specifically requested. */ + +#if defined _REGEX_RE_COMP || defined _LIBC +int +# ifdef _LIBC +weak_function +# endif +re_exec (s) + const char *s; +{ + return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); +} +#endif /* _REGEX_RE_COMP */ + +/* Internal entry point. */ + +/* Searches for a compiled pattern PREG in the string STRING, whose + length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same + mingings with regexec. START, and RANGE have the same meanings + with re_search. + Return REG_NOERROR if we find a match, and REG_NOMATCH if not, + otherwise return the error code. + Note: We assume front end functions already check ranges. + (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + +static reg_errcode_t +re_search_internal (const regex_t *preg, + const char *string, + int length, int start, int range, int stop, + size_t nmatch, regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t err; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int left_lim, right_lim, incr; + int fl_longest_match, match_first, match_kind, match_last = -1; + int extra_nmatch; + int sb, ch; +#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) + re_match_context_t mctx = { .dfa = dfa }; +#else + re_match_context_t mctx; +#endif + char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL; + RE_TRANSLATE_TYPE t = preg->translate; + +#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) + memset (&mctx, '\0', sizeof (re_match_context_t)); + mctx.dfa = dfa; +#endif + + extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; + nmatch -= extra_nmatch; + + /* Check if the DFA haven't been compiled. */ + if (BE (preg->used == 0 || dfa->init_state == NULL + || dfa->init_state_word == NULL || dfa->init_state_nl == NULL + || dfa->init_state_begbuf == NULL, 0)) + return REG_NOMATCH; + +#ifdef DEBUG + /* We assume front-end functions already check them. */ + assert (start + range >= 0 && start + range <= length); +#endif + + /* If initial states with non-begbuf contexts have no elements, + the regex must be anchored. If preg->newline_anchor is set, + we'll never use init_state_nl, so do not check it. */ + if (dfa->init_state->nodes.nelem == 0 + && dfa->init_state_word->nodes.nelem == 0 + && (dfa->init_state_nl->nodes.nelem == 0 + || !preg->newline_anchor)) + { + if (start != 0 && start + range != 0) + return REG_NOMATCH; + start = range = 0; + } + + /* We must check the longest matching, if nmatch > 0. */ + fl_longest_match = (nmatch != 0 || dfa->nbackref); + + err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, + preg->translate, preg->syntax & RE_ICASE, dfa); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + mctx.input.stop = stop; + mctx.input.raw_stop = stop; + mctx.input.newline_anchor = preg->newline_anchor; + + err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* We will log all the DFA states through which the dfa pass, + if nmatch > 1, or this dfa has "multibyte node", which is a + back-reference or a node which can accept multibyte character or + multi character collating element. */ + if (nmatch > 1 || dfa->has_mb_node) + { + /* Avoid overflow. */ + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) + { + err = REG_ESPACE; + goto free_return; + } + + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + if (BE (mctx.state_log == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } + } + else + mctx.state_log = NULL; + + match_first = start; + mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF + : CONTEXT_NEWLINE | CONTEXT_BEGBUF; + + /* Check incrementally whether of not the input string match. */ + incr = (range < 0) ? -1 : 1; + left_lim = (range < 0) ? start + range : start; + right_lim = (range < 0) ? start : start + range; + sb = dfa->mb_cur_max == 1; + match_kind = + (fastmap + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (range >= 0 ? 2 : 0) + | (t != NULL ? 1 : 0)) + : 8); + + for (;; match_first += incr) + { + err = REG_NOMATCH; + if (match_first < left_lim || right_lim < match_first) + goto free_return; + + /* Advance as rapidly as possible through the string, until we + find a plausible place to start matching. This may be done + with varying efficiency, so there are various possibilities: + only the most common of them are specialized, in order to + save on code size. We use a switch statement for speed. */ + switch (match_kind) + { + case 8: + /* No fastmap. */ + break; + + case 7: + /* Fastmap with single-byte translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + goto forward_match_found_start_or_reached_end; + + case 6: + /* Fastmap without translation, match forward. */ + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + + forward_match_found_start_or_reached_end: + if (BE (match_first == right_lim, 0)) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + goto free_return; + } + break; + + case 4: + case 5: + /* Fastmap without multi-byte translation, match backwards. */ + while (match_first >= left_lim) + { + ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + goto free_return; + break; + + default: + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer instead. */ + for (;;) + { + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + unsigned int offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + { + err = re_string_reconstruct (&mctx.input, match_first, + eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + offset = match_first - mctx.input.raw_mbs_idx; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = (match_first >= length + ? 0 : re_string_byte_at (&mctx.input, offset)); + if (fastmap[ch]) + break; + match_first += incr; + if (match_first < left_lim || match_first > right_lim) + { + err = REG_NOMATCH; + goto free_return; + } + } + break; + } + + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the beginning of the buffer. */ + err = re_string_reconstruct (&mctx.input, match_first, eflags); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + +#ifdef RE_ENABLE_I18N + /* Don't consider this char as a possible match start if it part, + yet isn't the head, of a multibyte character. */ + if (!sb && !re_string_first_byte (&mctx.input, 0)) + continue; +#endif + + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (&mctx, fl_longest_match, + range >= 0 ? &match_first : NULL); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + { + mctx.match_last = match_last; + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) + { + re_dfastate_t *pstate = mctx.state_log[match_last]; + mctx.last_node = check_halt_state_context (&mctx, pstate, + match_last); + } + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) + || dfa->nbackref) + { + err = prune_impossible_nodes (&mctx); + if (err == REG_NOERROR) + break; + if (BE (err != REG_NOMATCH, 0)) + goto free_return; + match_last = -1; + } + else + break; /* We found a match. */ + } + } + + match_ctx_clean (&mctx); + } + +#ifdef DEBUG + assert (match_last != -1); + assert (err == REG_NOERROR); +#endif + + /* Set pmatch[] if we need. */ + if (nmatch > 0) + { + int reg_idx; + + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + + /* Set the points where matching start/end. */ + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; + + if (!preg->no_sub && nmatch > 1) + { + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* At last, add the offset to the each registers, since we slided + the buffers so that we could assume that the matching starts + from 0. */ + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so != -1) + { +#ifdef RE_ENABLE_I18N + if (BE (mctx.input.offsets_needed != 0, 0)) + { + pmatch[reg_idx].rm_so = + (pmatch[reg_idx].rm_so == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_so]); + pmatch[reg_idx].rm_eo = + (pmatch[reg_idx].rm_eo == mctx.input.valid_len + ? mctx.input.valid_raw_len + : mctx.input.offsets[pmatch[reg_idx].rm_eo]); + } +#else + assert (mctx.input.offsets_needed == 0); +#endif + pmatch[reg_idx].rm_so += match_first; + pmatch[reg_idx].rm_eo += match_first; + } + for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) + { + pmatch[nmatch + reg_idx].rm_so = -1; + pmatch[nmatch + reg_idx].rm_eo = -1; + } + + if (dfa->subexp_map) + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } + } + + free_return: + re_free (mctx.state_log); + if (dfa->nbackref) + match_ctx_free (&mctx); + re_string_destruct (&mctx.input); + return err; +} + +static reg_errcode_t +prune_impossible_nodes (re_match_context_t *mctx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int halt_node, match_last; + reg_errcode_t ret; + re_dfastate_t **sifted_states; + re_dfastate_t **lim_states = NULL; + re_sift_context_t sctx; +#ifdef DEBUG + assert (mctx->state_log != NULL); +#endif + match_last = mctx->match_last; + halt_node = mctx->last_node; + + /* Avoid overflow. */ + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) + return REG_ESPACE; + + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (sifted_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + if (dfa->nbackref) + { + lim_states = re_malloc (re_dfastate_t *, match_last + 1); + if (BE (lim_states == NULL, 0)) + { + ret = REG_ESPACE; + goto free_return; + } + while (1) + { + memset (lim_states, '\0', + sizeof (re_dfastate_t *) * (match_last + 1)); + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, + match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] != NULL || lim_states[0] != NULL) + break; + do + { + --match_last; + if (match_last < 0) + { + ret = REG_NOMATCH; + goto free_return; + } + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); + halt_node = check_halt_state_context (mctx, + mctx->state_log[match_last], + match_last); + } + ret = merge_state_array (dfa, sifted_states, lim_states, + match_last + 1); + re_free (lim_states); + lim_states = NULL; + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + } + else + { + sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); + ret = sift_states_backward (mctx, &sctx); + re_node_set_free (&sctx.limits); + if (BE (ret != REG_NOERROR, 0)) + goto free_return; + if (sifted_states[0] == NULL) + { + ret = REG_NOMATCH; + goto free_return; + } + } + re_free (mctx->state_log); + mctx->state_log = sifted_states; + sifted_states = NULL; + mctx->last_node = halt_node; + mctx->match_last = match_last; + ret = REG_NOERROR; + free_return: + re_free (sifted_states); + re_free (lim_states); + return ret; +} + +/* Acquire an initial state and return it. + We must select appropriate initial state depending on the context, + since initial states may have constraints like "\<", "^", etc.. */ + +static inline re_dfastate_t * +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + if (dfa->init_state->has_constraint) + { + unsigned int context; + context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return dfa->init_state_word; + else if (IS_ORDINARY_CONTEXT (context)) + return dfa->init_state; + else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_begbuf; + else if (IS_NEWLINE_CONTEXT (context)) + return dfa->init_state_nl; + else if (IS_BEGBUF_CONTEXT (context)) + { + /* It is relatively rare case, then calculate on demand. */ + return re_acquire_state_context (err, dfa, + dfa->init_state->entrance_nodes, + context); + } + else + /* Must not happen? */ + return dfa->init_state; + } + else + return dfa->init_state; +} + +/* Check whether the regular expression match input string INPUT or not, + and return the index where the matching end, return -1 if not match, + or return -2 in case of an error. + FL_LONGEST_MATCH means we want the POSIX longest matching. + If P_MATCH_FIRST is not NULL, and the match fails, it is set to the + next place where we may want to try matching. + Note that the matcher assume that the maching starts from the current + index of the buffer. */ + +static int +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int match = 0; + int match_last = -1; + int cur_str_idx = re_string_cur_idx (&mctx->input); + re_dfastate_t *cur_state; + int at_init_state = p_match_first != NULL; + int next_start_idx = cur_str_idx; + + err = REG_NOERROR; + cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); + /* An initial state must not be NULL (invalid). */ + if (BE (cur_state == NULL, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + + if (mctx->state_log != NULL) + { + mctx->state_log[cur_str_idx] = cur_state; + + /* Check OP_OPEN_SUBEXP in the initial state in case that we use them + later. E.g. Processing back references. */ + if (BE (dfa->nbackref, 0)) + { + at_init_state = 0; + err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (cur_state->has_backref) + { + err = transit_state_bkref (mctx, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + + /* If the RE accepts NULL string. */ + if (BE (cur_state->halt, 0)) + { + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, cur_str_idx)) + { + if (!fl_longest_match) + return cur_str_idx; + else + { + match_last = cur_str_idx; + match = 1; + } + } + } + + while (!re_string_eoi (&mctx->input)) + { + re_dfastate_t *old_state = cur_state; + int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + + if (BE (next_char_idx >= mctx->input.bufs_len, 0) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + { + assert (err == REG_ESPACE); + return -2; + } + } + + cur_state = transit_state (&err, mctx, cur_state); + if (mctx->state_log != NULL) + cur_state = merge_state_with_log (&err, mctx, cur_state); + + if (cur_state == NULL) + { + /* Reached the invalid state or an error. Try to recover a valid + state using the state log, if available and if we have not + already found a valid (even if not the longest) match. */ + if (BE (err != REG_NOERROR, 0)) + return -2; + + if (mctx->state_log == NULL + || (match && !fl_longest_match) + || (cur_state = find_recover_state (&err, mctx)) == NULL) + break; + } + + if (BE (at_init_state, 0)) + { + if (old_state == cur_state) + next_start_idx = next_char_idx; + else + at_init_state = 0; + } + + if (cur_state->halt) + { + /* Reached a halt state. + Check the halt state can satisfy the current context. */ + if (!cur_state->has_constraint + || check_halt_state_context (mctx, cur_state, + re_string_cur_idx (&mctx->input))) + { + /* We found an appropriate halt state. */ + match_last = re_string_cur_idx (&mctx->input); + match = 1; + + /* We found a match, do not modify match_first below. */ + p_match_first = NULL; + if (!fl_longest_match) + break; + } + } + } + + if (p_match_first) + *p_match_first += next_start_idx; + + return match_last; +} + +/* Check NODE match the current context. */ + +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +{ + re_token_type_t type = dfa->nodes[node].type; + unsigned int constraint = dfa->nodes[node].constraint; + if (type != END_OF_RE) + return 0; + if (!constraint) + return 1; + if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) + return 0; + return 1; +} + +/* Check the halt state STATE match the current context. + Return 0 if not match, if the node, STATE has, is a halt node and + match the context, return the node. */ + +static int +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) +{ + int i; + unsigned int context; +#ifdef DEBUG + assert (state->halt); +#endif + context = re_string_context_at (&mctx->input, idx, mctx->eflags); + for (i = 0; i < state->nodes.nelem; ++i) + if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) + return state->nodes.elems[i]; + return 0; +} + +/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA + corresponding to the DFA). + Return the destination node, and update EPS_VIA_NODES, return -1 in case + of errors. */ + +static int +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) +{ + const re_dfa_t *const dfa = mctx->dfa; + int i, err; + if (IS_EPSILON_NODE (dfa->nodes[node].type)) + { + re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; + re_node_set *edests = &dfa->edests[node]; + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + /* Pick up a valid destination, or return -1 if none is found. */ + for (dest_node = -1, i = 0; i < edests->nelem; ++i) + { + int candidate = edests->elems[i]; + if (!re_node_set_contains (cur_nodes, candidate)) + continue; + if (dest_node == -1) + dest_node = candidate; + + else + { + /* In order to avoid infinite loop like "(a*)*", return the second + epsilon-transition if the first was already considered. */ + if (re_node_set_contains (eps_via_nodes, dest_node)) + return candidate; + + /* Otherwise, push the second epsilon-transition on the fail stack. */ + else if (fs != NULL + && push_fail_stack (fs, *pidx, candidate, nregs, regs, + eps_via_nodes)) + return -2; + + /* We know we are going to exit. */ + break; + } + } + return dest_node; + } + else + { + int naccepted = 0; + re_token_type_t type = dfa->nodes[node].type; + +#ifdef RE_ENABLE_I18N + if (dfa->nodes[node].accept_mb) + naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); + else +#endif /* RE_ENABLE_I18N */ + if (type == OP_BACK_REF) + { + int subexp_idx = dfa->nodes[node].opr.idx + 1; + naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; + if (fs != NULL) + { + if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) + return -1; + else if (naccepted) + { + char *buf = (char *) re_string_get_buffer (&mctx->input); + if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, + naccepted) != 0) + return -1; + } + } + + if (naccepted == 0) + { + int dest_node; + err = re_node_set_insert (eps_via_nodes, node); + if (BE (err < 0, 0)) + return -2; + dest_node = dfa->edests[node].elems[0]; + if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node)) + return dest_node; + } + } + + if (naccepted != 0 + || check_node_accept (mctx, dfa->nodes + node, *pidx)) + { + int dest_node = dfa->nexts[node]; + *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; + if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL + || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, + dest_node))) + return -1; + re_node_set_empty (eps_via_nodes); + return dest_node; + } + } + return -1; +} + +static reg_errcode_t +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +{ + reg_errcode_t err; + int num = fs->num++; + if (fs->num == fs->alloc) + { + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); + if (new_array == NULL) + return REG_ESPACE; + fs->alloc *= 2; + fs->stack = new_array; + } + fs->stack[num].idx = str_idx; + fs->stack[num].node = dest_node; + fs->stack[num].regs = re_malloc (regmatch_t, nregs); + if (fs->stack[num].regs == NULL) + return REG_ESPACE; + memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); + return err; +} + +static int +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) +{ + int num = --fs->num; + assert (num >= 0); + *pidx = fs->stack[num].idx; + memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + re_node_set_free (eps_via_nodes); + re_free (fs->stack[num].regs); + *eps_via_nodes = fs->stack[num].eps_via_nodes; + return fs->stack[num].node; +} + +/* Set the positions where the subexpressions are starts/ends to registers + PMATCH. + Note: We assume that pmatch[0] is already set, and + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ + +static reg_errcode_t +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) +{ + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + int idx, cur_node; + re_node_set eps_via_nodes; + struct re_fail_stack_t *fs; + struct re_fail_stack_t fs_body = { 0, 2, NULL }; + regmatch_t *prev_idx_match; + int prev_idx_match_malloced = 0; + +#ifdef DEBUG + assert (nmatch > 1); + assert (mctx->state_log != NULL); +#endif + if (fl_backtrack) + { + fs = &fs_body; + fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + if (fs->stack == NULL) + return REG_ESPACE; + } + else + fs = NULL; + + cur_node = dfa->init_node; + re_node_set_init_empty (&eps_via_nodes); + +#ifdef HAVE_ALLOCA + if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) + prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); + else +#endif + { + prev_idx_match = re_malloc (regmatch_t, nmatch); + if (prev_idx_match == NULL) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } + prev_idx_match_malloced = 1; + } + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + + for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) + { + update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); + + if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + { + int reg_idx; + if (fs) + { + for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) + break; + if (reg_idx == nmatch) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); + } + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + } + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOERROR; + } + } + + /* Proceed to next node. */ + cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + &eps_via_nodes, fs); + + if (BE (cur_node < 0, 0)) + { + if (BE (cur_node == -2, 0)) + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + free_fail_stack_return (fs); + return REG_ESPACE; + } + if (fs) + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + &eps_via_nodes); + else + { + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return REG_NOMATCH; + } + } + } + re_node_set_free (&eps_via_nodes); + if (prev_idx_match_malloced) + re_free (prev_idx_match); + return free_fail_stack_return (fs); +} + +static reg_errcode_t +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) +{ + if (fs) + { + int fs_idx; + for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) + { + re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); + re_free (fs->stack[fs_idx].regs); + } + re_free (fs->stack); + } + return REG_NOERROR; +} + +static void +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) +{ + int type = dfa->nodes[cur_node].type; + if (type == OP_OPEN_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + + /* We are at the first node of this sub expression. */ + if (reg_num < nmatch) + { + pmatch[reg_num].rm_so = cur_idx; + pmatch[reg_num].rm_eo = -1; + } + } + else if (type == OP_CLOSE_SUBEXP) + { + int reg_num = dfa->nodes[cur_node].opr.idx + 1; + if (reg_num < nmatch) + { + /* We are at the last node of this sub expression. */ + if (pmatch[reg_num].rm_so < cur_idx) + { + pmatch[reg_num].rm_eo = cur_idx; + /* This is a non-empty match or we are not inside an optional + subexpression. Accept this right away. */ + memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); + } + else + { + if (dfa->nodes[cur_node].opt_subexp + && prev_idx_match[reg_num].rm_so != -1) + /* We transited through an empty match for an optional + subexpression, like (a?)*, and this is not the subexp's + first match. Copy back the old content of the registers + so that matches of an inner subexpression are undone as + well, like in ((a?))*. */ + memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); + else + /* We completed a subexpression, but it may be part of + an optional one, so do not update PREV_IDX_MATCH. */ + pmatch[reg_num].rm_eo = cur_idx; + } + } + } +} + +/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 + and sift the nodes in each states according to the following rules. + Updated state_log will be wrote to STATE_LOG. + + Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + 1. When STR_IDX == MATCH_LAST(the last index in the state_log): + If `a' isn't the LAST_NODE and `a' can't epsilon transit to + the LAST_NODE, we throw away the node `a'. + 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts + string `s' and transit to `b': + i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw + away the node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is + thrown away, we throw away the node `a'. + 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': + i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the + node `a'. + ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, + we throw away the node `a'. */ + +#define STATE_NODE_CONTAINS(state,node) \ + ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) + +static reg_errcode_t +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) +{ + reg_errcode_t err; + int null_cnt = 0; + int str_idx = sctx->last_str_idx; + re_node_set cur_dest; + +#ifdef DEBUG + assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); +#endif + + /* Build sifted state_log[str_idx]. It has the nodes which can epsilon + transit to the last_node and the last_node itself. */ + err = re_node_set_init_1 (&cur_dest, sctx->last_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* Then check each states in the state_log. */ + while (str_idx > 0) + { + /* Update counters. */ + null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; + if (null_cnt > mctx->max_mb_elem_len) + { + memset (sctx->sifted_states, '\0', + sizeof (re_dfastate_t *) * str_idx); + re_node_set_free (&cur_dest); + return REG_NOERROR; + } + re_node_set_empty (&cur_dest); + --str_idx; + + if (mctx->state_log[str_idx]) + { + err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + + /* Add all the nodes which satisfy the following conditions: + - It can epsilon transit to a node in CUR_DEST. + - It is in CUR_SRC. + And update state_log. */ + err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + err = REG_NOERROR; + free_return: + re_node_set_free (&cur_dest); + return err; +} + +static reg_errcode_t +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + int i; + + /* Then build the next sifted state. + We build the next sifted state on `cur_dest', and update + `sifted_states[str_idx]' with `cur_dest'. + Note: + `cur_dest' is the sifted state from `state_log[str_idx + 1]'. + `cur_src' points the node_set of the old `state_log[str_idx]' + (with the epsilon nodes pre-filtered out). */ + for (i = 0; i < cur_src->nelem; i++) + { + int prev_node = cur_src->elems[i]; + int naccepted = 0; + int ret; + +#ifdef DEBUG + re_token_type_t type = dfa->nodes[prev_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[prev_node].accept_mb) + naccepted = sift_states_iter_mb (mctx, sctx, prev_node, + str_idx, sctx->last_str_idx); +#endif /* RE_ENABLE_I18N */ + + /* We don't check backreferences here. + See update_cur_sifted_state(). */ + if (!naccepted + && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) + && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], + dfa->nexts[prev_node])) + naccepted = 1; + + if (naccepted == 0) + continue; + + if (sctx->limits.nelem) + { + int to_idx = str_idx + naccepted; + if (check_dst_limits (mctx, &sctx->limits, + dfa->nexts[prev_node], to_idx, + prev_node, str_idx)) + continue; + } + ret = re_node_set_insert (cur_dest, prev_node); + if (BE (ret == -1, 0)) + return REG_ESPACE; + } + + return REG_NOERROR; +} + +/* Helper functions. */ + +static reg_errcode_t +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +{ + int top = mctx->state_log_top; + + if (next_state_log_idx >= mctx->input.bufs_len + || (next_state_log_idx >= mctx->input.valid_len + && mctx->input.valid_len < mctx->input.len)) + { + reg_errcode_t err; + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (top < next_state_log_idx) + { + memset (mctx->state_log + top + 1, '\0', + sizeof (re_dfastate_t *) * (next_state_log_idx - top)); + mctx->state_log_top = next_state_log_idx; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) +{ + int st_idx; + reg_errcode_t err; + for (st_idx = 0; st_idx < num; ++st_idx) + { + if (dst[st_idx] == NULL) + dst[st_idx] = src[st_idx]; + else if (src[st_idx] != NULL) + { + re_node_set merged_set; + err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, + &src[st_idx]->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + const re_node_set *candidates; + candidates = ((mctx->state_log[str_idx] == NULL) ? NULL + : &mctx->state_log[str_idx]->nodes); + + if (dest_nodes->nelem == 0) + sctx->sifted_states[str_idx] = NULL; + else + { + if (candidates) + { + /* At first, add the nodes which can epsilon transit to a node in + DEST_NODE. */ + err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + + /* Then, check the limitations in the current sift_context. */ + if (sctx->limits.nelem) + { + err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, + mctx->bkref_ents, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + + sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (candidates && mctx->state_log[str_idx]->has_backref) + { + err = sift_states_bkref (mctx, sctx, str_idx, candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + reg_errcode_t err = REG_NOERROR; + int i; + + re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + + if (!state->inveclosure.alloc) + { + err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + for (i = 0; i < dest_nodes->nelem; i++) + { + err = re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + } + } + return re_node_set_add_intersect (dest_nodes, candidates, + &state->inveclosure); +} + +static reg_errcode_t +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) +{ + int ecl_idx; + reg_errcode_t err; + re_node_set *inv_eclosure = dfa->inveclosures + node; + re_node_set except_nodes; + re_node_set_init_empty (&except_nodes); + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (cur_node == node) + continue; + if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) + { + int edst1 = dfa->edests[cur_node].elems[0]; + int edst2 = ((dfa->edests[cur_node].nelem > 1) + ? dfa->edests[cur_node].elems[1] : -1); + if ((!re_node_set_contains (inv_eclosure, edst1) + && re_node_set_contains (dest_nodes, edst1)) + || (edst2 > 0 + && !re_node_set_contains (inv_eclosure, edst2) + && re_node_set_contains (dest_nodes, edst2))) + { + err = re_node_set_add_intersect (&except_nodes, candidates, + dfa->inveclosures + cur_node); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&except_nodes); + return err; + } + } + } + } + for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) + { + int cur_node = inv_eclosure->elems[ecl_idx]; + if (!re_node_set_contains (&except_nodes, cur_node)) + { + int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + re_node_set_remove_at (dest_nodes, idx); + } + } + re_node_set_free (&except_nodes); + return REG_NOERROR; +} + +static int +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int lim_idx, src_pos, dst_pos; + + int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = mctx->bkref_ents + limits->elems[lim_idx]; + subexp_idx = dfa->nodes[ent->node].opr.idx; + + dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, dst_node, dst_idx, + dst_bkref_idx); + src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], + subexp_idx, src_node, src_idx, + src_bkref_idx); + + /* In case of: + <src> <dst> ( <subexp> ) + ( <subexp> ) <src> <dst> + ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ + if (src_pos == dst_pos) + continue; /* This is unrelated limitation. */ + else + return 1; + } + return 0; +} + +static int +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; + int node_idx; + + /* Else, we are on the boundary: examine the nodes on the epsilon + closure. */ + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; + switch (dfa->nodes[node].type) + { + case OP_BACK_REF: + if (bkref_idx != -1) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; + do + { + int dst, cpos; + + if (ent->node != node) + continue; + + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) + continue; + + /* Recurse trying to reach the OP_OPEN_SUBEXP and + OP_CLOSE_SUBEXP cases below. But, if the + destination node is the same node as the source + node, don't recurse because it would cause an + infinite loop: a regex that exhibits this behavior + is ()\1*\1* */ + dst = dfa->edests[node].elems[0]; + if (dst == from_node) + { + if (boundaries & 1) + return -1; + else /* if (boundaries & 2) */ + return 0; + } + + cpos = + check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + dst, bkref_idx); + if (cpos == -1 /* && (boundaries & 1) */) + return -1; + if (cpos == 0 && (boundaries & 2)) + return 0; + + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } + while (ent++->more); + } + break; + + case OP_OPEN_SUBEXP: + if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) + return -1; + break; + + case OP_CLOSE_SUBEXP: + if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) + return 0; + break; + + default: + break; + } + } + + return (boundaries & 2) ? 1 : 0; +} + +static int +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) +{ + struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; + int boundaries; + + /* If we are outside the range of the subexpression, return -1 or 1. */ + if (str_idx < lim->subexp_from) + return -1; + + if (lim->subexp_to < str_idx) + return 1; + + /* If we are within the subexpression, return 0. */ + boundaries = (str_idx == lim->subexp_from); + boundaries |= (str_idx == lim->subexp_to) << 1; + if (boundaries == 0) + return 0; + + /* Else, examine epsilon closure. */ + return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, + from_node, bkref_idx); +} + +/* Check the limitations of sub expressions LIMITS, and remove the nodes + which are against limitations from DEST_NODES. */ + +static reg_errcode_t +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) +{ + reg_errcode_t err; + int node_idx, lim_idx; + + for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) + { + int subexp_idx; + struct re_backref_cache_entry *ent; + ent = bkref_ents + limits->elems[lim_idx]; + + if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) + continue; /* This is unrelated limitation. */ + + subexp_idx = dfa->nodes[ent->node].opr.idx; + if (ent->subexp_to == str_idx) + { + int ops_node = -1; + int cls_node = -1; + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_OPEN_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + ops_node = node; + else if (type == OP_CLOSE_SUBEXP + && subexp_idx == dfa->nodes[node].opr.idx) + cls_node = node; + } + + /* Check the limitation of the open subexpression. */ + /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ + if (ops_node >= 0) + { + err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + + /* Check the limitation of the close subexpression. */ + if (cls_node >= 0) + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + if (!re_node_set_contains (dfa->inveclosures + node, + cls_node) + && !re_node_set_contains (dfa->eclosures + node, + cls_node)) + { + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + --node_idx; + } + } + } + else /* (ent->subexp_to != str_idx) */ + { + for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) + { + int node = dest_nodes->elems[node_idx]; + re_token_type_t type = dfa->nodes[node].type; + if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) + { + if (subexp_idx != dfa->nodes[node].opr.idx) + continue; + /* It is against this limitation. + Remove it form the current sifted state. */ + err = sub_epsilon_src_nodes (dfa, node, dest_nodes, + candidates); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + } + } + return REG_NOERROR; +} + +static reg_errcode_t +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int node_idx, node; + re_sift_context_t local_sctx; + int first_idx = search_cur_bkref_entry (mctx, str_idx); + + if (first_idx == -1) + return REG_NOERROR; + + local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ + + for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) + { + int enabled_idx; + re_token_type_t type; + struct re_backref_cache_entry *entry; + node = candidates->elems[node_idx]; + type = dfa->nodes[node].type; + /* Avoid infinite loop for the REs like "()\1+". */ + if (node == sctx->last_node && str_idx == sctx->last_str_idx) + continue; + if (type != OP_BACK_REF) + continue; + + entry = mctx->bkref_ents + first_idx; + enabled_idx = first_idx; + do + { + int subexp_len; + int to_idx; + int dst_node; + int ret; + re_dfastate_t *cur_state; + + if (entry->node != node) + continue; + subexp_len = entry->subexp_to - entry->subexp_from; + to_idx = str_idx + subexp_len; + dst_node = (subexp_len ? dfa->nexts[node] + : dfa->edests[node].elems[0]); + + if (to_idx > sctx->last_str_idx + || sctx->sifted_states[to_idx] == NULL + || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) + || check_dst_limits (mctx, &sctx->limits, node, + str_idx, dst_node, to_idx)) + continue; + + if (local_sctx.sifted_states == NULL) + { + local_sctx = *sctx; + err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.last_node = node; + local_sctx.last_str_idx = str_idx; + ret = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (ret < 0, 0)) + { + err = REG_ESPACE; + goto free_return; + } + cur_state = local_sctx.sifted_states[str_idx]; + err = sift_states_backward (mctx, &local_sctx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + if (sctx->limited_states != NULL) + { + err = merge_state_array (dfa, sctx->limited_states, + local_sctx.sifted_states, + str_idx + 1); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + local_sctx.sifted_states[str_idx] = cur_state; + re_node_set_remove (&local_sctx.limits, enabled_idx); + + /* mctx->bkref_ents may have changed, reload the pointer. */ + entry = mctx->bkref_ents + enabled_idx; + } + while (enabled_idx++, entry++->more); + } + err = REG_NOERROR; + free_return: + if (local_sctx.sifted_states != NULL) + { + re_node_set_free (&local_sctx.limits); + } + + return err; +} + + +#ifdef RE_ENABLE_I18N +static int +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int naccepted; + /* Check the node can accept `multi byte'. */ + naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); + if (naccepted > 0 && str_idx + naccepted <= max_str_idx && + !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], + dfa->nexts[node_idx])) + /* The node can't accept the `multi byte', or the + destination was already thrown away, then the node + could't accept the current input `multi byte'. */ + naccepted = 0; + /* Otherwise, it is sure that the node could accept + `naccepted' bytes input. */ + return naccepted; +} +#endif /* RE_ENABLE_I18N */ + + +/* Functions for state transition. */ + +/* Return the next state to which the current state STATE will transit by + accepting the current input byte, and update STATE_LOG if necessary. + If STATE can accept a multibyte char/collating element/back reference + update the destination of STATE_LOG. */ + +static re_dfastate_t * +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + re_dfastate_t **trtable; + unsigned char ch; + +#ifdef RE_ENABLE_I18N + /* If the current state can accept multibyte. */ + if (BE (state->accept_mb, 0)) + { + *err = transit_state_mb (mctx, state); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } +#endif /* RE_ENABLE_I18N */ + + /* Then decide the next state with the single byte. */ +#if 0 + if (0) + /* don't use transition table */ + return transit_state_sb (err, mctx, state); +#endif + + /* Use transition table */ + ch = re_string_fetch_byte (&mctx->input); + for (;;) + { + trtable = state->trtable; + if (BE (trtable != NULL, 1)) + return trtable[ch]; + + trtable = state->word_trtable; + if (BE (trtable != NULL, 1)) + { + unsigned int context; + context + = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + if (IS_WORD_CONTEXT (context)) + return trtable[ch + SBC_MAX]; + else + return trtable[ch]; + } + + if (!build_trtable (mctx->dfa, state)) + { + *err = REG_ESPACE; + return NULL; + } + + /* Retry, we now have a transition table. */ + } +} + +/* Update the state_log if we need */ +re_dfastate_t * +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) +{ + const re_dfa_t *const dfa = mctx->dfa; + int cur_idx = re_string_cur_idx (&mctx->input); + + if (cur_idx > mctx->state_log_top) + { + mctx->state_log[cur_idx] = next_state; + mctx->state_log_top = cur_idx; + } + else if (mctx->state_log[cur_idx] == 0) + { + mctx->state_log[cur_idx] = next_state; + } + else + { + re_dfastate_t *pstate; + unsigned int context; + re_node_set next_nodes, *log_nodes, *table_nodes = NULL; + /* If (state_log[cur_idx] != 0), it implies that cur_idx is + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ + pstate = mctx->state_log[cur_idx]; + log_nodes = pstate->entrance_nodes; + if (next_state != NULL) + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, + log_nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + } + else + next_nodes = *log_nodes; + /* Note: We already add the nodes of the initial state, + then we don't need to add them here. */ + + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input) - 1, + mctx->eflags); + next_state = mctx->state_log[cur_idx] + = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + if (table_nodes != NULL) + re_node_set_free (&next_nodes); + } + + if (BE (dfa->nbackref, 0) && next_state != NULL) + { + /* Check OP_OPEN_SUBEXP in the current state in case that we use them + later. We must check them here, since the back references in the + next state might use them. */ + *err = check_subexp_matching_top (mctx, &next_state->nodes, + cur_idx); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + + /* If the next state has back references. */ + if (next_state->has_backref) + { + *err = transit_state_bkref (mctx, &next_state->nodes); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + next_state = mctx->state_log[cur_idx]; + } + } + + return next_state; +} + +/* Skip bytes in the input that correspond to part of a + multi-byte match, then look in the log for a state + from which to restart matching. */ +re_dfastate_t * +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) +{ + re_dfastate_t *cur_state; + do + { + int max = mctx->state_log_top; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + do + { + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); + } + while (mctx->state_log[cur_str_idx] == NULL); + + cur_state = merge_state_with_log (err, mctx, NULL); + } + while (*err == REG_NOERROR && cur_state == NULL); + return cur_state; +} + +/* Helper functions for transit_state. */ + +/* From the node set CUR_NODES, pick up the nodes whose types are + OP_OPEN_SUBEXP and which have corresponding back references in the regular + expression. And register them to use them later for evaluating the + correspoding back references. */ + +static reg_errcode_t +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int node_idx; + reg_errcode_t err; + + /* TODO: This isn't efficient. + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) + { + int node = cur_nodes->elems[node_idx]; + if (dfa->nodes[node].type == OP_OPEN_SUBEXP + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) + { + err = match_ctx_add_subtop (mctx, node, str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + } + } + return REG_NOERROR; +} + +#if 0 +/* Return the next state to which the current state STATE will transit by + accepting the current input byte. */ + +static re_dfastate_t * +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) +{ + const re_dfa_t *const dfa = mctx->dfa; + re_node_set next_nodes; + re_dfastate_t *next_state; + int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + unsigned int context; + + *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); + if (BE (*err != REG_NOERROR, 0)) + return NULL; + for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) + { + int cur_node = state->nodes.elems[node_cnt]; + if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) + { + *err = re_node_set_merge (&next_nodes, + dfa->eclosures + dfa->nexts[cur_node]); + if (BE (*err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return NULL; + } + } + } + context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); + next_state = re_acquire_state_context (err, dfa, &next_nodes, context); + /* We don't need to check errors here, since the return value of + this function is next_state and ERR is already set. */ + + re_node_set_free (&next_nodes); + re_string_skip_bytes (&mctx->input, 1); + return next_state; +} +#endif + +#ifdef RE_ENABLE_I18N +static reg_errcode_t +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + + for (i = 0; i < pstate->nodes.nelem; ++i) + { + re_node_set dest_nodes, *new_nodes; + int cur_node_idx = pstate->nodes.elems[i]; + int naccepted, dest_idx; + unsigned int context; + re_dfastate_t *dest_state; + + if (!dfa->nodes[cur_node_idx].accept_mb) + continue; + + if (dfa->nodes[cur_node_idx].constraint) + { + context = re_string_context_at (&mctx->input, + re_string_cur_idx (&mctx->input), + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, + context)) + continue; + } + + /* How many bytes the node can accept? */ + naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, + re_string_cur_idx (&mctx->input)); + if (naccepted == 0) + continue; + + /* The node can accepts `naccepted' bytes. */ + dest_idx = re_string_cur_idx (&mctx->input) + naccepted; + mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted + : mctx->max_mb_elem_len); + err = clean_state_log_if_needed (mctx, dest_idx); + if (BE (err != REG_NOERROR, 0)) + return err; +#ifdef DEBUG + assert (dfa->nexts[cur_node_idx] != -1); +#endif + new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; + + dest_state = mctx->state_log[dest_idx]; + if (dest_state == NULL) + dest_nodes = *new_nodes; + else + { + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, new_nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); + mctx->state_log[dest_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + if (dest_state != NULL) + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; + } + return REG_NOERROR; +} +#endif /* RE_ENABLE_I18N */ + +static reg_errcode_t +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int i; + int cur_str_idx = re_string_cur_idx (&mctx->input); + + for (i = 0; i < nodes->nelem; ++i) + { + int dest_str_idx, prev_nelem, bkc_idx; + int node_idx = nodes->elems[i]; + unsigned int context; + const re_token_t *node = dfa->nodes + node_idx; + re_node_set *new_dest_nodes; + + /* Check whether `node' is a backreference or not. */ + if (node->type != OP_BACK_REF) + continue; + + if (node->constraint) + { + context = re_string_context_at (&mctx->input, cur_str_idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + continue; + } + + /* `node' is a backreference. + Check the substring which the substring matched. */ + bkc_idx = mctx->nbkref_ents; + err = get_subexp (mctx, node_idx, cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + + /* And add the epsilon closures (which is `new_dest_nodes') of + the backreference to appropriate state_log. */ +#ifdef DEBUG + assert (dfa->nexts[node_idx] != -1); +#endif + for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) + { + int subexp_len; + re_dfastate_t *dest_state; + struct re_backref_cache_entry *bkref_ent; + bkref_ent = mctx->bkref_ents + bkc_idx; + if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) + continue; + subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; + new_dest_nodes = (subexp_len == 0 + ? dfa->eclosures + dfa->edests[node_idx].elems[0] + : dfa->eclosures + dfa->nexts[node_idx]); + dest_str_idx = (cur_str_idx + bkref_ent->subexp_to + - bkref_ent->subexp_from); + context = re_string_context_at (&mctx->input, dest_str_idx - 1, + mctx->eflags); + dest_state = mctx->state_log[dest_str_idx]; + prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 + : mctx->state_log[cur_str_idx]->nodes.nelem); + /* Add `new_dest_node' to state_log. */ + if (dest_state == NULL) + { + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, new_dest_nodes, + context); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + else + { + re_node_set dest_nodes; + err = re_node_set_init_union (&dest_nodes, + dest_state->entrance_nodes, + new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&dest_nodes); + goto free_return; + } + mctx->state_log[dest_str_idx] + = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_str_idx] == NULL + && err != REG_NOERROR, 0)) + goto free_return; + } + /* We need to check recursively if the backreference can epsilon + transit. */ + if (subexp_len == 0 + && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) + { + err = check_subexp_matching_top (mctx, new_dest_nodes, + cur_str_idx); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + err = transit_state_bkref (mctx, new_dest_nodes); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + } + } + err = REG_NOERROR; + free_return: + return err; +} + +/* Enumerate all the candidates which the backreference BKREF_NODE can match + at BKREF_STR_IDX, and register them by match_ctx_add_entry(). + Note that we might collect inappropriate candidates here. + However, the cost of checking them strictly here is too high, then we + delay these checking for prune_impossible_nodes(). */ + +static reg_errcode_t +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +{ + const re_dfa_t *const dfa = mctx->dfa; + int subexp_num, sub_top_idx; + const char *buf = (const char *) re_string_get_buffer (&mctx->input); + /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ + int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + if (cache_idx != -1) + { + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; + do + if (entry->node == bkref_node) + return REG_NOERROR; /* We already checked it. */ + while (entry++->more); + } + + subexp_num = dfa->nodes[bkref_node].opr.idx; + + /* For each sub expression */ + for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) + { + reg_errcode_t err; + re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; + re_sub_match_last_t *sub_last; + int sub_last_idx, sl_str, bkref_str_off; + + if (dfa->nodes[sub_top->node].opr.idx != subexp_num) + continue; /* It isn't related. */ + + sl_str = sub_top->str_idx; + bkref_str_off = bkref_str_idx; + /* At first, check the last node of sub expressions we already + evaluated. */ + for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) + { + int sl_str_diff; + sub_last = sub_top->lasts[sub_last_idx]; + sl_str_diff = sub_last->str_idx - sl_str; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_diff > 0) + { + if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) + { + /* Not enough chars for a successful match. */ + if (bkref_str_off + sl_str_diff > mctx->input.len) + break; + + err = clean_state_log_if_needed (mctx, + bkref_str_off + + sl_str_diff); + if (BE (err != REG_NOERROR, 0)) + return err; + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) + /* We don't need to search this sub expression any more. */ + break; + } + bkref_str_off += sl_str_diff; + sl_str += sl_str_diff; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + + /* Reload buf, since the preceding call might have reallocated + the buffer. */ + buf = (const char *) re_string_get_buffer (&mctx->input); + + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + } + + if (sub_last_idx < sub_top->nlasts) + continue; + if (sub_last_idx > 0) + ++sl_str; + /* Then, search for the other last nodes of the sub expression. */ + for (; sl_str <= bkref_str_idx; ++sl_str) + { + int cls_node, sl_str_off; + const re_node_set *nodes; + sl_str_off = sl_str - sub_top->str_idx; + /* The matched string by the sub expression match with the substring + at the back reference? */ + if (sl_str_off > 0) + { + if (BE (bkref_str_off >= mctx->input.valid_len, 0)) + { + /* If we are at the end of the input, we cannot match. */ + if (bkref_str_off >= mctx->input.len) + break; + + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) + return err; + + buf = (const char *) re_string_get_buffer (&mctx->input); + } + if (buf [bkref_str_off++] != buf[sl_str - 1]) + break; /* We don't need to search this sub expression + any more. */ + } + if (mctx->state_log[sl_str] == NULL) + continue; + /* Does this state have a ')' of the sub expression? */ + nodes = &mctx->state_log[sl_str]->nodes; + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); + if (cls_node == -1) + continue; /* No. */ + if (sub_top->path == NULL) + { + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); + if (sub_top->path == NULL) + return REG_ESPACE; + } + /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node + in the current context? */ + err = check_arrival (mctx, sub_top->path, sub_top->node, + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); + if (err == REG_NOMATCH) + continue; + if (BE (err != REG_NOERROR, 0)) + return err; + sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); + if (BE (sub_last == NULL, 0)) + return REG_ESPACE; + err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, + bkref_str_idx); + if (err == REG_NOMATCH) + continue; + } + } + return REG_NOERROR; +} + +/* Helper functions for get_subexp(). */ + +/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. + If it can arrive, register the sub expression expressed with SUB_TOP + and SUB_LAST. */ + +static reg_errcode_t +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) +{ + reg_errcode_t err; + int to_idx; + /* Can the subexpression arrive the back reference? */ + err = check_arrival (mctx, &sub_last->path, sub_last->node, + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); + if (err != REG_NOERROR) + return err; + err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, + sub_last->str_idx); + if (BE (err != REG_NOERROR, 0)) + return err; + to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; + return clean_state_log_if_needed (mctx, to_idx); +} + +/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. + Search '(' if FL_OPEN, or search ')' otherwise. + TODO: This function isn't efficient... + Because there might be more than one nodes whose types are + OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all + nodes. + E.g. RE: (a){2} */ + +static int +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) +{ + int cls_idx; + for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) + { + int cls_node = nodes->elems[cls_idx]; + const re_token_t *node = dfa->nodes + cls_node; + if (node->type == type + && node->opr.idx == subexp_idx) + return cls_node; + } + return -1; +} + +/* Check whether the node TOP_NODE at TOP_STR can arrive to the node + LAST_NODE at LAST_STR. We record the path onto PATH since it will be + heavily reused. + Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ + +static reg_errcode_t +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; + int subexp_num, backup_cur_idx, str_idx, null_cnt; + re_dfastate_t *cur_state = NULL; + re_node_set *cur_nodes, next_nodes; + re_dfastate_t **backup_state_log; + unsigned int context; + + subexp_num = dfa->nodes[top_node].opr.idx; + /* Extend the buffer if we need. */ + if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) + { + re_dfastate_t **new_array; + int old_alloc = path->alloc; + path->alloc += last_str + mctx->max_mb_elem_len + 1; + new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + if (BE (new_array == NULL, 0)) + { + path->alloc = old_alloc; + return REG_ESPACE; + } + path->array = new_array; + memset (new_array + old_alloc, '\0', + sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + } + + str_idx = path->next_idx ? path->next_idx : top_str; + + /* Temporary modify MCTX. */ + backup_state_log = mctx->state_log; + backup_cur_idx = mctx->input.cur_idx; + mctx->state_log = path->array; + mctx->input.cur_idx = str_idx; + + /* Setup initial node set. */ + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + if (str_idx == top_str) + { + err = re_node_set_init_1 (&next_nodes, top_node); + if (BE (err != REG_NOERROR, 0)) + return err; + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + else + { + cur_state = mctx->state_log[str_idx]; + if (cur_state && cur_state->has_backref) + { + err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); + if (BE (err != REG_NOERROR, 0)) + return err; + } + else + re_node_set_init_empty (&next_nodes); + } + if (str_idx == top_str || (cur_state && cur_state->has_backref)) + { + if (next_nodes.nelem) + { + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + } + + for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) + { + re_node_set_empty (&next_nodes); + if (mctx->state_log[str_idx + 1]) + { + err = re_node_set_merge (&next_nodes, + &mctx->state_log[str_idx + 1]->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + if (cur_state) + { + err = check_arrival_add_next_nodes (mctx, str_idx, + &cur_state->non_eps_nodes, + &next_nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + ++str_idx; + if (next_nodes.nelem) + { + err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + err = expand_bkref_cache (mctx, &next_nodes, str_idx, + subexp_num, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + } + context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); + cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); + if (BE (cur_state == NULL && err != REG_NOERROR, 0)) + { + re_node_set_free (&next_nodes); + return err; + } + mctx->state_log[str_idx] = cur_state; + null_cnt = cur_state == NULL ? null_cnt + 1 : 0; + } + re_node_set_free (&next_nodes); + cur_nodes = (mctx->state_log[last_str] == NULL ? NULL + : &mctx->state_log[last_str]->nodes); + path->next_idx = str_idx; + + /* Fix MCTX. */ + mctx->state_log = backup_state_log; + mctx->input.cur_idx = backup_cur_idx; + + /* Then check the current node set has the node LAST_NODE. */ + if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) + return REG_NOERROR; + + return REG_NOMATCH; +} + +/* Helper functions for check_arrival. */ + +/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them + to NEXT_NODES. + TODO: This function is similar to the functions transit_state*(), + however this function has many additional works. + Can't we unify them? */ + +static reg_errcode_t +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) +{ + const re_dfa_t *const dfa = mctx->dfa; + int result; + int cur_idx; +#ifdef RE_ENABLE_I18N + reg_errcode_t err = REG_NOERROR; +#endif + re_node_set union_set; + re_node_set_init_empty (&union_set); + for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) + { + int naccepted = 0; + int cur_node = cur_nodes->elems[cur_idx]; +#ifdef DEBUG + re_token_type_t type = dfa->nodes[cur_node].type; + assert (!IS_EPSILON_NODE (type)); +#endif +#ifdef RE_ENABLE_I18N + /* If the node may accept `multi byte'. */ + if (dfa->nodes[cur_node].accept_mb) + { + naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, + str_idx); + if (naccepted > 1) + { + re_dfastate_t *dest_state; + int next_node = dfa->nexts[cur_node]; + int next_idx = str_idx + naccepted; + dest_state = mctx->state_log[next_idx]; + re_node_set_empty (&union_set); + if (dest_state) + { + err = re_node_set_merge (&union_set, &dest_state->nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + result = re_node_set_insert (&union_set, next_node); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + mctx->state_log[next_idx] = re_acquire_state (&err, dfa, + &union_set); + if (BE (mctx->state_log[next_idx] == NULL + && err != REG_NOERROR, 0)) + { + re_node_set_free (&union_set); + return err; + } + } + } +#endif /* RE_ENABLE_I18N */ + if (naccepted + || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) + { + result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (result < 0, 0)) + { + re_node_set_free (&union_set); + return REG_ESPACE; + } + } + } + re_node_set_free (&union_set); + return REG_NOERROR; +} + +/* For all the nodes in CUR_NODES, add the epsilon closures of them to + CUR_NODES, however exclude the nodes which are: + - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. + - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. +*/ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) +{ + reg_errcode_t err; + int idx, outside_node; + re_node_set new_nodes; +#ifdef DEBUG + assert (cur_nodes->nelem); +#endif + err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); + if (BE (err != REG_NOERROR, 0)) + return err; + /* Create a new node set NEW_NODES with the nodes which are epsilon + closures of the node in CUR_NODES. */ + + for (idx = 0; idx < cur_nodes->nelem; ++idx) + { + int cur_node = cur_nodes->elems[idx]; + const re_node_set *eclosure = dfa->eclosures + cur_node; + outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); + if (outside_node == -1) + { + /* There are no problematic nodes, just merge them. */ + err = re_node_set_merge (&new_nodes, eclosure); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + else + { + /* There are problematic nodes, re-calculate incrementally. */ + err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + { + re_node_set_free (&new_nodes); + return err; + } + } + } + re_node_set_free (cur_nodes); + *cur_nodes = new_nodes; + return REG_NOERROR; +} + +/* Helper function for check_arrival_expand_ecl. + Check incrementally the epsilon closure of TARGET, and if it isn't + problematic append it to DST_NODES. */ + +static reg_errcode_t +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) +{ + int cur_node; + for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) + { + int err; + + if (dfa->nodes[cur_node].type == type + && dfa->nodes[cur_node].opr.idx == ex_subexp) + { + if (type == OP_CLOSE_SUBEXP) + { + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + } + break; + } + err = re_node_set_insert (dst_nodes, cur_node); + if (BE (err == -1, 0)) + return REG_ESPACE; + if (dfa->edests[cur_node].nelem == 0) + break; + if (dfa->edests[cur_node].nelem == 2) + { + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; + } + cur_node = dfa->edests[cur_node].elems[0]; + } + return REG_NOERROR; +} + + +/* For all the back references in the current state, calculate the + destination of the back references by the appropriate entry + in MCTX->BKREF_ENTS. */ + +static reg_errcode_t +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) +{ + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err; + int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + struct re_backref_cache_entry *ent; + + if (cache_idx_start == -1) + return REG_NOERROR; + + restart: + ent = mctx->bkref_ents + cache_idx_start; + do + { + int to_idx, next_node; + + /* Is this entry ENT is appropriate? */ + if (!re_node_set_contains (cur_nodes, ent->node)) + continue; /* No. */ + + to_idx = cur_str + ent->subexp_to - ent->subexp_from; + /* Calculate the destination of the back reference, and append it + to MCTX->STATE_LOG. */ + if (to_idx == cur_str) + { + /* The backreference did epsilon transit, we must re-check all the + node in the current state. */ + re_node_set new_dests; + reg_errcode_t err2, err3; + next_node = dfa->edests[ent->node].elems[0]; + if (re_node_set_contains (cur_nodes, next_node)) + continue; + err = re_node_set_init_1 (&new_dests, next_node); + err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); + err3 = re_node_set_merge (cur_nodes, &new_dests); + re_node_set_free (&new_dests); + if (BE (err != REG_NOERROR || err2 != REG_NOERROR + || err3 != REG_NOERROR, 0)) + { + err = (err != REG_NOERROR ? err + : (err2 != REG_NOERROR ? err2 : err3)); + return err; + } + /* TODO: It is still inefficient... */ + goto restart; + } + else + { + re_node_set union_set; + next_node = dfa->nexts[ent->node]; + if (mctx->state_log[to_idx]) + { + int ret; + if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, + next_node)) + continue; + err = re_node_set_init_copy (&union_set, + &mctx->state_log[to_idx]->nodes); + ret = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ret < 0, 0)) + { + re_node_set_free (&union_set); + err = err != REG_NOERROR ? err : REG_ESPACE; + return err; + } + } + else + { + err = re_node_set_init_1 (&union_set, next_node); + if (BE (err != REG_NOERROR, 0)) + return err; + } + mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); + re_node_set_free (&union_set); + if (BE (mctx->state_log[to_idx] == NULL + && err != REG_NOERROR, 0)) + return err; + } + } + while (ent++->more); + return REG_NOERROR; +} + +/* Build transition table for the state. + Return 1 if succeeded, otherwise return NULL. */ + +static int +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) +{ + reg_errcode_t err; + int i, j, ch, need_word_trtable = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + int ndests; /* Number of the destination states from `state'. */ + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; + re_node_set follows, *dests_node; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; + + /* We build DFA states which corresponds to the destination nodes + from `state'. `dests_node[i]' represents the nodes which i-th + destination state contains, and `dests_ch[i]' represents the + characters which i-th destination state accepts. */ +#ifdef HAVE_ALLOCA + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); + else +#endif + { + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return 0; + dests_node_malloced = true; + } + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; + + /* Initialize transiton table. */ + state->word_trtable = state->trtable = NULL; + + /* At first, group all nodes belonging to `state' into several + destinations. */ + ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); + if (BE (ndests <= 0, 0)) + { + if (dests_node_malloced) + free (dests_alloc); + /* Return 0 in case of an error, 1 otherwise. */ + if (ndests == 0) + { + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + return 1; + } + return 0; + } + + err = re_node_set_alloc (&follows, ndests + 1); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + + /* Avoid arithmetic overflow in size calculation. */ + if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) + / (3 * sizeof (re_dfastate_t *))) + < ndests), + 0)) + goto out_free; + +#ifdef HAVE_ALLOCA + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else +#endif + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + if (dests_node_malloced) + free (dests_alloc); + return 0; + } + dest_states_malloced = true; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); + + /* Then build the states for all destinations. */ + for (i = 0; i < ndests; ++i) + { + int next_node; + re_node_set_empty (&follows); + /* Merge the follows of this destination states. */ + for (j = 0; j < dests_node[i].nelem; ++j) + { + next_node = dfa->nexts[dests_node[i].elems[j]]; + if (next_node != -1) + { + err = re_node_set_merge (&follows, dfa->eclosures + next_node); + if (BE (err != REG_NOERROR, 0)) + goto out_free; + } + } + dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); + if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + /* If the new state has context constraint, + build appropriate states for these contexts. */ + if (dest_states[i]->has_constraint) + { + dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_WORD); + if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + + if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) + need_word_trtable = 1; + + dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, + CONTEXT_NEWLINE); + if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) + goto out_free; + } + else + { + dest_states_word[i] = dest_states[i]; + dest_states_nl[i] = dest_states[i]; + } + bitset_merge (acceptable, dests_ch[i]); + } + + if (!BE (need_word_trtable, 0)) + { + /* We don't care about whether the following character is a word + character, or we are in a single-byte character set so we can + discern by looking at the character code: allocate a + 256-entry transition table. */ + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + if (dfa->word_char[i] & mask) + trtable[ch] = dest_states_word[j]; + else + trtable[ch] = dest_states[j]; + } + } + else + { + /* We care about whether the following character is a word + character, and we are in a multi-byte character set: discern + by looking at the character code: build two 256-entry + transition tables, one starting at trtable[0] and one + starting at trtable[SBC_MAX]. */ + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); + if (BE (trtable == NULL, 0)) + goto out_free; + + /* For all characters ch...: */ + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; + elem; + mask <<= 1, elem >>= 1, ++ch) + if (BE (elem & 1, 0)) + { + /* There must be exactly one destination which accepts + character ch. See group_nodes_into_DFAstates. */ + for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) + ; + + /* j-th destination accepts the word character ch. */ + trtable[ch] = dest_states[j]; + trtable[ch + SBC_MAX] = dest_states_word[j]; + } + } + + /* new line */ + if (bitset_contain (acceptable, NEWLINE_CHAR)) + { + /* The current state accepts newline character. */ + for (j = 0; j < ndests; ++j) + if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) + { + /* k-th destination accepts newline character. */ + trtable[NEWLINE_CHAR] = dest_states_nl[j]; + if (need_word_trtable) + trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; + /* There must be only one destination which accepts + newline. See group_nodes_into_DFAstates. */ + break; + } + } + + if (dest_states_malloced) + free (dest_states); + + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + + if (dests_node_malloced) + free (dests_alloc); + + return 1; +} + +/* Group all nodes belonging to STATE into several destinations. + Then for all destinations, set the nodes belonging to the destination + to DESTS_NODE[i] and set the characters accepted by the destination + to DEST_CH[i]. This function return the number of destinations. */ + +static int +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) +{ + reg_errcode_t err; + int result; + int i, j, k; + int ndests; /* Number of the destinations from `state'. */ + bitset_t accepts; /* Characters a node can accept. */ + const re_node_set *cur_nodes = &state->nodes; + bitset_empty (accepts); + ndests = 0; + + /* For all the nodes belonging to `state', */ + for (i = 0; i < cur_nodes->nelem; ++i) + { + re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; + re_token_type_t type = node->type; + unsigned int constraint = node->constraint; + + /* Enumerate all single byte character this node can accept. */ + if (type == CHARACTER) + bitset_set (accepts, node->opr.c); + else if (type == SIMPLE_BRACKET) + { + bitset_merge (accepts, node->opr.sbcset); + } + else if (type == OP_PERIOD) + { +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + bitset_merge (accepts, dfa->sb_char); + else +#endif + bitset_set_all (accepts); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#ifdef RE_ENABLE_I18N + else if (type == OP_UTF8_PERIOD) + { + memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (!(dfa->syntax & RE_DOT_NEWLINE)) + bitset_clear (accepts, '\n'); + if (dfa->syntax & RE_DOT_NOT_NULL) + bitset_clear (accepts, '\0'); + } +#endif + else + continue; + + /* Check the `accepts' and sift the characters which are not + match it the context. */ + if (constraint) + { + if (constraint & NEXT_NEWLINE_CONSTRAINT) + { + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bitset_empty (accepts); + if (accepts_newline) + bitset_set (accepts, NEWLINE_CHAR); + else + continue; + } + if (constraint & NEXT_ENDBUF_CONSTRAINT) + { + bitset_empty (accepts); + continue; + } + + if (constraint & NEXT_WORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= dfa->word_char[j]); + if (!any_set) + continue; + } + if (constraint & NEXT_NOTWORD_CONSTRAINT) + { + bitset_word_t any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } +#ifdef RE_ENABLE_I18N + if (dfa->mb_cur_max > 1) + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); + else +#endif + for (j = 0; j < BITSET_WORDS; ++j) + any_set |= (accepts[j] &= ~dfa->word_char[j]); + if (!any_set) + continue; + } + } + + /* Then divide `accepts' into DFA states, or create a new + state. Above, we make sure that accepts is not empty. */ + for (j = 0; j < ndests; ++j) + { + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; + /* Flags, see below. */ + bitset_word_t has_intersec, not_subset, not_consumed; + + /* Optimization, skip if this state doesn't accept the character. */ + if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) + continue; + + /* Enumerate the intersection set of this state and `accepts'. */ + has_intersec = 0; + for (k = 0; k < BITSET_WORDS; ++k) + has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; + /* And skip if the intersection set is empty. */ + if (!has_intersec) + continue; + + /* Then check if this state is a subset of `accepts'. */ + not_subset = not_consumed = 0; + for (k = 0; k < BITSET_WORDS; ++k) + { + not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; + not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; + } + + /* If this state isn't a subset of `accepts', create a + new group state, which has the `remains'. */ + if (not_subset) + { + bitset_copy (dests_ch[ndests], remains); + bitset_copy (dests_ch[j], intersec); + err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + } + + /* Put the position in the current group. */ + result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (result < 0, 0)) + goto error_return; + + /* If all characters are consumed, go to next node. */ + if (!not_consumed) + break; + } + /* Some characters remain, create a new group. */ + if (j == ndests) + { + bitset_copy (dests_ch[ndests], accepts); + err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + goto error_return; + ++ndests; + bitset_empty (accepts); + } + } + return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; +} + +#ifdef RE_ENABLE_I18N +/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. + Return the number of the bytes the node accepts. + STR_IDX is the current index of the input string. + + This function handles the nodes which can accept one character, or + one collating element like '.', '[a-z]', opposite to the other nodes + can only accept one byte. */ + +static int +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) +{ + const re_token_t *node = dfa->nodes + node_idx; + int char_len, elem_len; + int i; + wint_t wc; + + if (BE (node->type == OP_UTF8_PERIOD, 0)) + { + unsigned char c = re_string_byte_at (input, str_idx), d; + if (BE (c < 0xc2, 1)) + return 0; + + if (str_idx + 2 > input->len) + return 0; + + d = re_string_byte_at (input, str_idx + 1); + if (c < 0xe0) + return (d < 0x80 || d > 0xbf) ? 0 : 2; + else if (c < 0xf0) + { + char_len = 3; + if (c == 0xe0 && d < 0xa0) + return 0; + } + else if (c < 0xf8) + { + char_len = 4; + if (c == 0xf0 && d < 0x90) + return 0; + } + else if (c < 0xfc) + { + char_len = 5; + if (c == 0xf8 && d < 0x88) + return 0; + } + else if (c < 0xfe) + { + char_len = 6; + if (c == 0xfc && d < 0x84) + return 0; + } + else + return 0; + + if (str_idx + char_len > input->len) + return 0; + + for (i = 1; i < char_len; ++i) + { + d = re_string_byte_at (input, str_idx + i); + if (d < 0x80 || d > 0xbf) + return 0; + } + return char_len; + } + + char_len = re_string_char_size_at (input, str_idx); + if (node->type == OP_PERIOD) + { + if (char_len <= 1) + return 0; + /* FIXME: I don't think this if is needed, as both '\n' + and '\0' are char_len == 1. */ + /* '.' accepts any one character except the following two cases. */ + if ((!(dfa->syntax & RE_DOT_NEWLINE) && + re_string_byte_at (input, str_idx) == '\n') || + ((dfa->syntax & RE_DOT_NOT_NULL) && + re_string_byte_at (input, str_idx) == '\0')) + return 0; + return char_len; + } + + elem_len = re_string_elem_size_at (input, str_idx); + wc = __btowc(*(input->mbs+str_idx)); + if (((elem_len <= 1 && char_len <= 1) || char_len == 0) && (wc != WEOF && wc < SBC_MAX)) + return 0; + + if (node->type == COMPLEX_BRACKET) + { + const re_charset_t *cset = node->opr.mbcset; +# ifdef _LIBC + const unsigned char *pin + = ((const unsigned char *) re_string_get_buffer (input) + str_idx); + int j; + uint32_t nrules; +# endif /* _LIBC */ + int match_len = 0; + wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) + ? re_string_wchar_at (input, str_idx) : 0); + + /* match with multibyte character? */ + for (i = 0; i < cset->nmbchars; ++i) + if (wc == cset->mbchars[i]) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + /* match with character_class? */ + for (i = 0; i < cset->nchar_classes; ++i) + { + wctype_t wt = cset->char_classes[i]; + if (__iswctype (wc, wt)) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + +# ifdef _LIBC + nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules != 0) + { + unsigned int in_collseq = 0; + const int32_t *table, *indirect; + const unsigned char *weights, *extra; + const char *collseqwc; + /* This #include defines a local function! */ +# include <locale/weight.h> + + /* match with collating_symbol? */ + if (cset->ncoll_syms) + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + for (i = 0; i < cset->ncoll_syms; ++i) + { + const unsigned char *coll_sym = extra + cset->coll_syms[i]; + /* Compare the length of input collating element and + the length of current collating element. */ + if (*coll_sym != elem_len) + continue; + /* Compare each bytes. */ + for (j = 0; j < *coll_sym; j++) + if (pin[j] != coll_sym[1 + j]) + break; + if (j == *coll_sym) + { + /* Match if every bytes is equal. */ + match_len = j; + goto check_node_accept_bytes_match; + } + } + + if (cset->nranges) + { + if (elem_len <= char_len) + { + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + in_collseq = __collseq_table_lookup (collseqwc, wc); + } + else + in_collseq = find_collation_sequence_value (pin, elem_len); + } + /* match with range expression? */ + for (i = 0; i < cset->nranges; ++i) + if (cset->range_starts[i] <= in_collseq + && in_collseq <= cset->range_ends[i]) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + + /* match with equivalence_class? */ + if (cset->nequiv_classes) + { + const unsigned char *cp = pin; + table = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); + weights = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); + extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); + indirect = (const int32_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); + int32_t idx = findidx (&cp); + if (idx > 0) + for (i = 0; i < cset->nequiv_classes; ++i) + { + int32_t equiv_class_idx = cset->equiv_classes[i]; + size_t weight_len = weights[idx & 0xffffff]; + if (weight_len == weights[equiv_class_idx & 0xffffff] + && (idx >> 24) == (equiv_class_idx >> 24)) + { + int cnt = 0; + + idx &= 0xffffff; + equiv_class_idx &= 0xffffff; + + while (cnt <= weight_len + && (weights[equiv_class_idx + 1 + cnt] + == weights[idx + 1 + cnt])) + ++cnt; + if (cnt > weight_len) + { + match_len = elem_len; + goto check_node_accept_bytes_match; + } + } + } + } + } + else +# endif /* _LIBC */ + { + /* match with range expression? */ +#if __GNUC__ >= 2 + wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; +#else + wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; + cmp_buf[2] = wc; +#endif + for (i = 0; i < cset->nranges; ++i) + { + cmp_buf[0] = cset->range_starts[i]; + cmp_buf[4] = cset->range_ends[i]; + if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 + && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + { + match_len = char_len; + goto check_node_accept_bytes_match; + } + } + } + check_node_accept_bytes_match: + if (!cset->non_match) + return match_len; + else + { + if (match_len > 0) + return 0; + else + return (elem_len > char_len) ? elem_len : char_len; + } + } + return 0; +} + +# ifdef _LIBC +static unsigned int +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) +{ + uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); + if (nrules == 0) + { + if (mbs_len == 1) + { + /* No valid character. Match it as a single byte character. */ + const unsigned char *collseq = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); + return collseq[mbs[0]]; + } + return UINT_MAX; + } + else + { + int32_t idx; + const unsigned char *extra = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); + int32_t extrasize = (const unsigned char *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; + + for (idx = 0; idx < extrasize;) + { + int mbs_cnt, found = 0; + int32_t elem_mbs_len; + /* Skip the name of collating element name. */ + idx = idx + extra[idx] + 1; + elem_mbs_len = extra[idx++]; + if (mbs_len == elem_mbs_len) + { + for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) + if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) + break; + if (mbs_cnt == elem_mbs_len) + /* Found the entry. */ + found = 1; + } + /* Skip the byte sequence of the collating element. */ + idx += elem_mbs_len; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + /* Skip the wide char sequence of the collating element. */ + idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + /* If we found the entry, return the sequence value. */ + if (found) + return *(uint32_t *) (extra + idx); + /* Skip the collation sequence value. */ + idx += sizeof (uint32_t); + } + return UINT_MAX; + } +} +# endif /* _LIBC */ +#endif /* RE_ENABLE_I18N */ + +/* Check whether the node accepts the byte which is IDX-th + byte of the INPUT. */ + +static int +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) +{ + unsigned char ch; + ch = re_string_byte_at (&mctx->input, idx); + switch (node->type) + { + case CHARACTER: + if (node->opr.c != ch) + return 0; + break; + + case SIMPLE_BRACKET: + if (!bitset_contain (node->opr.sbcset, ch)) + return 0; + break; + +#ifdef RE_ENABLE_I18N + case OP_UTF8_PERIOD: + if (ch >= 0x80) + return 0; + /* FALLTHROUGH */ +#endif + case OP_PERIOD: + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return 0; + break; + + default: + return 0; + } + + if (node->constraint) + { + /* The node has constraints. Check whether the current context + satisfies the constraints. */ + unsigned int context = re_string_context_at (&mctx->input, idx, + mctx->eflags); + if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) + return 0; + } + + return 1; +} + +/* Extend the buffers, if the buffers have run out. */ + +static reg_errcode_t +internal_function +extend_buffers (re_match_context_t *mctx) +{ + reg_errcode_t ret; + re_string_t *pstr = &mctx->input; + + /* Avoid overflow. */ + if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) + return REG_ESPACE; + + /* Double the lengthes of the buffers. */ + ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + if (BE (ret != REG_NOERROR, 0)) + return ret; + + if (mctx->state_log != NULL) + { + /* And double the length of state_log. */ + /* XXX We have no indication of the size of this buffer. If this + allocation fail we have no indication that the state_log array + does not have the right size. */ + re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->state_log = new_array; + } + + /* Then reconstruct the buffers. */ + if (pstr->icase) + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + { + ret = build_wcs_upper_buffer (pstr); + if (BE (ret != REG_NOERROR, 0)) + return ret; + } + else +#endif /* RE_ENABLE_I18N */ + build_upper_buffer (pstr); + } + else + { +#ifdef RE_ENABLE_I18N + if (pstr->mb_cur_max > 1) + build_wcs_buffer (pstr); + else +#endif /* RE_ENABLE_I18N */ + { + if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + } + return REG_NOERROR; +} + + +/* Functions for matching context. */ + +/* Initialize MCTX. */ + +static reg_errcode_t +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) +{ + mctx->eflags = eflags; + mctx->match_last = -1; + if (n > 0) + { + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) + return REG_ESPACE; + } + /* Already zero-ed by the caller. + else + mctx->bkref_ents = NULL; + mctx->nbkref_ents = 0; + mctx->nsub_tops = 0; */ + mctx->abkref_ents = n; + mctx->max_mb_elem_len = 1; + mctx->asub_tops = n; + return REG_NOERROR; +} + +/* Clean the entries which depend on the current input in MCTX. + This function must be invoked when the matcher changes the start index + of the input, or changes the input string. */ + +static void +internal_function +match_ctx_clean (re_match_context_t *mctx) +{ + int st_idx; + for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) + { + int sl_idx; + re_sub_match_top_t *top = mctx->sub_tops[st_idx]; + for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) + { + re_sub_match_last_t *last = top->lasts[sl_idx]; + re_free (last->path.array); + re_free (last); + } + re_free (top->lasts); + if (top->path) + { + re_free (top->path->array); + re_free (top->path); + } + free (top); + } + + mctx->nsub_tops = 0; + mctx->nbkref_ents = 0; +} + +/* Free all the memory associated with MCTX. */ + +static void +internal_function +match_ctx_free (re_match_context_t *mctx) +{ + /* First, free all the memory associated with MCTX->SUB_TOPS. */ + match_ctx_clean (mctx); + re_free (mctx->sub_tops); + re_free (mctx->bkref_ents); +} + +/* Add a new backreference entry to MCTX. + Note that we assume that caller never call this function with duplicate + entry, and call with STR_IDX which isn't smaller than any existing entry. +*/ + +static reg_errcode_t +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) +{ + if (mctx->nbkref_ents >= mctx->abkref_ents) + { + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; + memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + mctx->abkref_ents *= 2; + } + if (mctx->nbkref_ents > 0 + && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) + mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; + + mctx->bkref_ents[mctx->nbkref_ents].node = node; + mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; + mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; + mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; + if (mctx->max_mb_elem_len < to - from) + mctx->max_mb_elem_len = to - from; + return REG_NOERROR; +} + +/* Search for the first entry which has the same str_idx, or -1 if none is + found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ + +static int +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +{ + int left, right, mid, last; + last = right = mctx->nbkref_ents; + for (left = 0; left < right;) + { + mid = (left + right) / 2; + if (mctx->bkref_ents[mid].str_idx < str_idx) + left = mid + 1; + else + right = mid; + } + if (left < last && mctx->bkref_ents[left].str_idx == str_idx) + return left; + else + return -1; +} + +/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches + at STR_IDX. */ + +static reg_errcode_t +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +{ +#ifdef DEBUG + assert (mctx->sub_tops != NULL); + assert (mctx->asub_tops > 0); +#endif + if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) + { + int new_asub_tops = mctx->asub_tops * 2; + re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, + re_sub_match_top_t *, + new_asub_tops); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops = new_array; + mctx->asub_tops = new_asub_tops; + } + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); + if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) + return REG_ESPACE; + mctx->sub_tops[mctx->nsub_tops]->node = node; + mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; + return REG_NOERROR; +} + +/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches + at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ + +static re_sub_match_last_t * +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +{ + re_sub_match_last_t *new_entry; + if (BE (subtop->nlasts == subtop->alasts, 0)) + { + int new_alasts = 2 * subtop->alasts + 1; + re_sub_match_last_t **new_array = re_realloc (subtop->lasts, + re_sub_match_last_t *, + new_alasts); + if (BE (new_array == NULL, 0)) + return NULL; + subtop->lasts = new_array; + subtop->alasts = new_alasts; + } + new_entry = calloc (1, sizeof (re_sub_match_last_t)); + if (BE (new_entry != NULL, 1)) + { + subtop->lasts[subtop->nlasts] = new_entry; + new_entry->node = node; + new_entry->str_idx = str_idx; + ++subtop->nlasts; + } + return new_entry; +} + +static void +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) +{ + sctx->sifted_states = sifted_sts; + sctx->limited_states = limited_sts; + sctx->last_node = last_node; + sctx->last_str_idx = last_str_idx; + re_node_set_init_empty (&sctx->limits); +} diff --git a/compat/strtok_r.c b/compat/strtok_r.c new file mode 100644 index 0000000000..7b5d568a96 --- /dev/null +++ b/compat/strtok_r.c @@ -0,0 +1,61 @@ +/* Reentrant string tokenizer. Generic version. + Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "../git-compat-util.h" + +/* Parse S into tokens separated by characters in DELIM. + If S is NULL, the saved pointer in SAVE_PTR is used as + the next starting point. For example: + char s[] = "-abc-=-def"; + char *sp; + x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def" + x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL + x = strtok_r(NULL, "=", &sp); // x = NULL + // s = "abc\0-def\0" +*/ +char * +gitstrtok_r (char *s, const char *delim, char **save_ptr) +{ + char *token; + + if (s == NULL) + s = *save_ptr; + + /* Scan leading delimiters. */ + s += strspn (s, delim); + if (*s == '\0') + { + *save_ptr = s; + return NULL; + } + + /* Find the end of the token. */ + token = s; + s = strpbrk (token, delim); + if (s == NULL) + /* This token finishes the string. */ + *save_ptr = token + strlen (token); + else + { + /* Terminate the token and make *SAVE_PTR point past it. */ + *s = '\0'; + *save_ptr = s + 1; + } + return token; +} diff --git a/config.mak.in b/config.mak.in index b4e65c32b2..a0c34eec15 100644 --- a/config.mak.in +++ b/config.mak.in @@ -46,6 +46,7 @@ NO_IPV6=@NO_IPV6@ NO_C99_FORMAT=@NO_C99_FORMAT@ NO_HSTRERROR=@NO_HSTRERROR@ NO_STRCASESTR=@NO_STRCASESTR@ +NO_STRTOK_R=@NO_STRTOK_R@ NO_MEMMEM=@NO_MEMMEM@ NO_STRLCPY=@NO_STRLCPY@ NO_UINTMAX_T=@NO_UINTMAX_T@ @@ -58,6 +59,7 @@ NO_INET_NTOP=@NO_INET_NTOP@ NO_INET_PTON=@NO_INET_PTON@ NO_ICONV=@NO_ICONV@ OLD_ICONV=@OLD_ICONV@ +NO_REGEX=@NO_REGEX@ NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@ INLINE=@INLINE@ SOCKLEN_T=@SOCKLEN_T@ diff --git a/configure.ac b/configure.ac index 5601e8bac9..56731c35c9 100644 --- a/configure.ac +++ b/configure.ac @@ -706,6 +706,27 @@ else fi AC_SUBST(NO_C99_FORMAT) # +# Define NO_REGEX if you have no or inferior regex support in your C library. +AC_CACHE_CHECK([whether the platform regex can handle null bytes], + [ac_cv_c_excellent_regex], [ +AC_EGREP_CPP(yippeeyeswehaveit, + AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT +#include <regex.h> +], +[#ifdef REG_STARTEND +yippeeyeswehaveit +#endif +]), + [ac_cv_c_excellent_regex=yes], + [ac_cv_c_excellent_regex=no]) +]) +if test $ac_cv_c_excellent_regex = yes; then + NO_REGEX= +else + NO_REGEX=YesPlease +fi +AC_SUBST(NO_REGEX) +# # Define FREAD_READS_DIRECTORIES if your are on a system which succeeds # when attempting to read from an fopen'ed directory. AC_CACHE_CHECK([whether system succeeds to read fopen'ed directory], @@ -783,6 +804,12 @@ GIT_CHECK_FUNC(strcasestr, [NO_STRCASESTR=YesPlease]) AC_SUBST(NO_STRCASESTR) # +# Define NO_STRTOK_R if you don't have strtok_r +GIT_CHECK_FUNC(strtok_r, +[NO_STRTOK_R=], +[NO_STRTOK_R=YesPlease]) +AC_SUBST(NO_STRTOK_R) +# # Define NO_MEMMEM if you don't have memmem. GIT_CHECK_FUNC(memmem, [NO_MEMMEM=], diff --git a/contrib/examples/git-merge.sh b/contrib/examples/git-merge.sh index 8f617fcb70..7b922c3948 100755 --- a/contrib/examples/git-merge.sh +++ b/contrib/examples/git-merge.sh @@ -15,7 +15,10 @@ log add list of one-line log to merge commit message squash create a single commit instead of doing a merge commit perform a commit if the merge succeeds (default) ff allow fast-forward (default) +ff-only abort if fast-forward is not possible +rerere-autoupdate update index with any reused conflict resolution s,strategy= merge strategy to use +X= option for selected merge strategy m,message= message to be used for the merge commit (if any) " @@ -25,26 +28,32 @@ require_work_tree cd_to_toplevel test -z "$(git ls-files -u)" || - die "You are in the middle of a conflicted merge." + die "Merge is not possible because you have unmerged files." + +! test -e "$GIT_DIR/MERGE_HEAD" || + die 'You have not concluded your merge (MERGE_HEAD exists).' LF=' ' all_strategies='recur recursive octopus resolve stupid ours subtree' all_strategies="$all_strategies recursive-ours recursive-theirs" +not_strategies='base file index tree' default_twohead_strategies='recursive' default_octopus_strategies='octopus' no_fast_forward_strategies='subtree ours' no_trivial_strategies='recursive recur subtree ours recursive-ours recursive-theirs' use_strategies= +xopt= allow_fast_forward=t +fast_forward_only= allow_trivial_merge=t -squash= no_commit= log_arg= +squash= no_commit= log_arg= rr_arg= dropsave() { rm -f -- "$GIT_DIR/MERGE_HEAD" "$GIT_DIR/MERGE_MSG" \ - "$GIT_DIR/MERGE_STASH" || exit 1 + "$GIT_DIR/MERGE_STASH" "$GIT_DIR/MERGE_MODE" || exit 1 } savestate() { @@ -131,21 +140,34 @@ finish () { merge_name () { remote="$1" rh=$(git rev-parse --verify "$remote^0" 2>/dev/null) || return - bh=$(git show-ref -s --verify "refs/heads/$remote" 2>/dev/null) - if test "$rh" = "$bh" - then - echo "$rh branch '$remote' of ." - elif truname=$(expr "$remote" : '\(.*\)~[1-9][0-9]*$') && + if truname=$(expr "$remote" : '\(.*\)~[0-9]*$') && git show-ref -q --verify "refs/heads/$truname" 2>/dev/null then echo "$rh branch '$truname' (early part) of ." - elif test "$remote" = "FETCH_HEAD" -a -r "$GIT_DIR/FETCH_HEAD" + return + fi + if found_ref=$(git rev-parse --symbolic-full-name --verify \ + "$remote" 2>/dev/null) + then + expanded=$(git check-ref-format --branch "$remote") || + exit + if test "${found_ref#refs/heads/}" != "$found_ref" + then + echo "$rh branch '$expanded' of ." + return + elif test "${found_ref#refs/remotes/}" != "$found_ref" + then + echo "$rh remote branch '$expanded' of ." + return + fi + fi + if test "$remote" = "FETCH_HEAD" -a -r "$GIT_DIR/FETCH_HEAD" then sed -e 's/ not-for-merge / /' -e 1q \ "$GIT_DIR/FETCH_HEAD" - else - echo "$rh commit '$remote'" + return fi + echo "$rh commit '$remote'" } parse_config () { @@ -172,16 +194,36 @@ parse_config () { --no-ff) test "$squash" != t || die "You cannot combine --squash with --no-ff." + test "$fast_forward_only" != t || + die "You cannot combine --ff-only with --no-ff." allow_fast_forward=f ;; + --ff-only) + test "$allow_fast_forward" != f || + die "You cannot combine --ff-only with --no-ff." + fast_forward_only=t ;; + --rerere-autoupdate|--no-rerere-autoupdate) + rr_arg=$1 ;; -s|--strategy) shift case " $all_strategies " in *" $1 "*) - use_strategies="$use_strategies$1 " ;; + use_strategies="$use_strategies$1 " + ;; *) - die "available strategies are: $all_strategies" ;; + case " $not_strategies " in + *" $1 "*) + false + esac && + type "git-merge-$1" >/dev/null 2>&1 || + die "available strategies are: $all_strategies" + use_strategies="$use_strategies$1 " + ;; esac ;; + -X) + shift + xopt="${xopt:+$xopt }$(git rev-parse --sq-quote "--$1")" + ;; -m|--message) shift merge_msg="$1" @@ -245,6 +287,10 @@ then exit 1 fi + test "$squash" != t || + die "Squash commit into empty head not supported yet" + test "$allow_fast_forward" = t || + die "Non-fast-forward into an empty head does not make sense" rh=$(git rev-parse --verify "$1^0") || die "$1 - not something we can merge" @@ -261,12 +307,18 @@ else # the given message. If remote is invalid we will die # later in the common codepath so we discard the error # in this loop. - merge_name=$(for remote + merge_msg="$( + for remote do merge_name "$remote" - done | git fmt-merge-msg $log_arg - ) - merge_msg="${merge_msg:+$merge_msg$LF$LF}$merge_name" + done | + if test "$have_message" = t + then + git fmt-merge-msg -m "$merge_msg" $log_arg + else + git fmt-merge-msg $log_arg + fi + )" fi head=$(git rev-parse --verify "$head_arg"^0) || usage @@ -335,7 +387,7 @@ case "$#" in common=$(git merge-base --all $head "$@") ;; *) - common=$(git show-branch --merge-base $head "$@") + common=$(git merge-base --all --octopus $head "$@") ;; esac echo "$head" >"$GIT_DIR/ORIG_HEAD" @@ -373,8 +425,8 @@ t,1,"$head",*) # We are not doing octopus, not fast-forward, and have only # one common. git update-index --refresh 2>/dev/null - case "$allow_trivial_merge" in - t) + case "$allow_trivial_merge,$fast_forward_only" in + t,) # See if it is really trivial. git var GIT_COMMITTER_IDENT >/dev/null || exit echo "Trying really trivial in-index merge..." @@ -413,6 +465,11 @@ t,1,"$head",*) ;; esac +if test "$fast_forward_only" = t +then + die "Not possible to fast-forward, aborting." +fi + # We are going to make a new commit. git var GIT_COMMITTER_IDENT >/dev/null || exit @@ -451,7 +508,7 @@ do # Remember which strategy left the state in the working tree wt_strategy=$strategy - git-merge-$strategy $common -- "$head_arg" "$@" + eval 'git-merge-$strategy '"$xopt"' $common -- "$head_arg" "$@"' exit=$? if test "$no_commit" = t && test "$exit" = 0 then @@ -489,9 +546,9 @@ if test '' != "$result_tree" then if test "$allow_fast_forward" = "t" then - parents=$(git show-branch --independent "$head" "$@") + parents=$(git merge-base --independent "$head" "$@") else - parents=$(git rev-parse "$head" "$@") + parents=$(git rev-parse "$head" "$@") fi parents=$(echo "$parents" | sed -e 's/^/-p /') result_commit=$(printf '%s\n' "$merge_msg" | git commit-tree $result_tree $parents) || exit @@ -533,7 +590,15 @@ else do echo $remote done >"$GIT_DIR/MERGE_HEAD" - printf '%s\n' "$merge_msg" >"$GIT_DIR/MERGE_MSG" + printf '%s\n' "$merge_msg" >"$GIT_DIR/MERGE_MSG" || + die "Could not write to $GIT_DIR/MERGE_MSG" + if test "$allow_fast_forward" != t + then + printf "%s" no-ff + else + : + fi >"$GIT_DIR/MERGE_MODE" || + die "Could not write to $GIT_DIR/MERGE_MODE" fi if test "$merge_was_ok" = t @@ -550,6 +615,6 @@ Conflicts: sed -e 's/^[^ ]* / /' | uniq } >>"$GIT_DIR/MERGE_MSG" - git rerere + git rerere $rr_arg die "Automatic merge failed; fix conflicts and then commit the result." fi diff --git a/contrib/examples/git-revert.sh b/contrib/examples/git-revert.sh index 49f00321b2..60a05a8b97 100755 --- a/contrib/examples/git-revert.sh +++ b/contrib/examples/git-revert.sh @@ -181,7 +181,6 @@ Conflicts: esac exit 1 } -echo >&2 "Finished one $me." # If we are cherry-pick, and if the merge did not result in # hand-editing, we will hit this commit and inherit the original diff --git a/contrib/hooks/post-receive-email b/contrib/hooks/post-receive-email index 09c524105c..0085086437 100755 --- a/contrib/hooks/post-receive-email +++ b/contrib/hooks/post-receive-email @@ -55,6 +55,11 @@ # "t=%s; printf 'http://.../?id=%%s' \$t; echo;echo; git show -C \$t; echo" # Be careful if "..." contains things that will be expanded by shell "eval" # or printf. +# hooks.emailmaxlines +# The maximum number of lines that should be included in the generated +# email body. If not specified, there is no limit. +# Lines beyond the limit are suppressed and counted, and a final +# line is added indicating the number of suppressed lines. # # Notes # ----- @@ -84,6 +89,7 @@ generate_email() oldrev=$(git rev-parse $1) newrev=$(git rev-parse $2) refname="$3" + maxlines=$4 # --- Interpret # 0000->1234 (create) @@ -192,7 +198,12 @@ generate_email() fn_name=atag ;; esac - generate_${change_type}_${fn_name}_email + + if [ -z "$maxlines" ]; then + generate_${change_type}_${fn_name}_email + else + generate_${change_type}_${fn_name}_email | limit_lines $maxlines + fi generate_email_footer } @@ -642,6 +653,24 @@ show_new_revisions() } +limit_lines() +{ + lines=0 + skipped=0 + while IFS="" read -r line; do + lines=$((lines + 1)) + if [ $lines -gt $1 ]; then + skipped=$((skipped + 1)) + else + printf "%s\n" "$line" + fi + done + if [ $skipped -ne 0 ]; then + echo "... $skipped lines suppressed ..." + fi +} + + send_mail() { if [ -n "$envelopesender" ]; then @@ -679,6 +708,7 @@ announcerecipients=$(git config hooks.announcelist) envelopesender=$(git config hooks.envelopesender) emailprefix=$(git config hooks.emailprefix || echo '[SCM] ') custom_showrev=$(git config hooks.showrev) +maxlines=$(git config hooks.emailmaxlines) # --- Main loop # Allow dual mode: run from the command line just like the update hook, or @@ -691,6 +721,6 @@ if [ -n "$1" -a -n "$2" -a -n "$3" ]; then else while read oldrev newrev refname do - generate_email $oldrev $newrev $refname | send_mail + generate_email $oldrev $newrev $refname $maxlines | send_mail done fi diff --git a/contrib/svn-fe/svn-fe.c b/contrib/svn-fe/svn-fe.c index e9b9ba4da4..a2677b03e0 100644 --- a/contrib/svn-fe/svn-fe.c +++ b/contrib/svn-fe/svn-fe.c @@ -10,6 +10,7 @@ int main(int argc, char **argv) { svndump_init(NULL); svndump_read((argc > 1) ? argv[1] : NULL); + svndump_deinit(); svndump_reset(); return 0; } diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt index de30f83a1f..35f84bd9e7 100644 --- a/contrib/svn-fe/svn-fe.txt +++ b/contrib/svn-fe/svn-fe.txt @@ -12,7 +12,7 @@ svnadmin dump --incremental REPO | svn-fe [url] | git fast-import DESCRIPTION ----------- -Converts a Subversion dumpfile (version: 2) into input suitable for +Converts a Subversion dumpfile into input suitable for git-fast-import(1) and similar importers. REPO is a path to a Subversion repository mirrored on the local disk. Remote Subversion repositories can be mirrored on local disk using the `svnsync` @@ -25,6 +25,9 @@ Subversion's repository dump format is documented in full in Files in this format can be generated using the 'svnadmin dump' or 'svk admin dump' command. +Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3) +are not supported. + OUTPUT FORMAT ------------- The fast-import format is documented by the git-fast-import(1) @@ -43,11 +46,9 @@ user <user@UUID> as committer, where 'user' is the value of the `svn:author` property and 'UUID' the repository's identifier. -To support incremental imports, 'svn-fe' will put a `git-svn-id` -line at the end of each commit log message if passed an url on the -command line. This line has the form `git-svn-id: URL@REVNO UUID`. - -Empty directories and unknown properties are silently discarded. +To support incremental imports, 'svn-fe' puts a `git-svn-id` line at +the end of each commit log message if passed an url on the command +line. This line has the form `git-svn-id: URL@REVNO UUID`. The resulting repository will generally require further processing to put each project in its own repository and to separate the history @@ -56,9 +57,9 @@ may be useful for this purpose. BUGS ---- -Litters the current working directory with .bin files for -persistence. Will be fixed when the svn-fe infrastructure is aware of -a Git working directory. +Empty directories and unknown properties are silently discarded. + +The exit status does not reflect whether an error was detected. SEE ALSO -------- @@ -93,7 +93,8 @@ static int is_binary(unsigned long size, struct text_stat *stats) return 0; } -static enum eol determine_output_conversion(enum action action) { +static enum eol determine_output_conversion(enum action action) +{ switch (action) { case CRLF_BINARY: return EOL_UNSET; @@ -693,7 +694,8 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) return !!ATTR_TRUE(value); } -enum action determine_action(enum action text_attr, enum eol eol_attr) { +static enum action determine_action(enum action text_attr, enum eol eol_attr) +{ if (text_attr == CRLF_BINARY) return CRLF_BINARY; if (eol_attr == EOL_LF) @@ -739,7 +741,9 @@ int convert_to_git(const char *path, const char *src, size_t len, return ret | ident_to_git(path, src, len, dst, ident); } -int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst) +static int convert_to_working_tree_internal(const char *path, const char *src, + size_t len, struct strbuf *dst, + int normalizing) { struct git_attr_check check[5]; enum action action = CRLF_GUESS; @@ -765,11 +769,32 @@ int convert_to_working_tree(const char *path, const char *src, size_t len, struc src = dst->buf; len = dst->len; } - action = determine_action(action, eol_attr); - ret |= crlf_to_worktree(path, src, len, dst, action); + /* + * CRLF conversion can be skipped if normalizing, unless there + * is a smudge filter. The filter might expect CRLFs. + */ + if (filter || !normalizing) { + action = determine_action(action, eol_attr); + ret |= crlf_to_worktree(path, src, len, dst, action); + if (ret) { + src = dst->buf; + len = dst->len; + } + } + return ret | apply_filter(path, src, len, dst, filter); +} + +int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst) +{ + return convert_to_working_tree_internal(path, src, len, dst, 0); +} + +int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst) +{ + int ret = convert_to_working_tree_internal(path, src, len, dst, 1); if (ret) { src = dst->buf; len = dst->len; } - return ret | apply_filter(path, src, len, dst, filter); + return ret | convert_to_git(path, src, len, dst, 0); } @@ -586,7 +586,7 @@ static int date_string(unsigned long date, int offset, char *buf, int len) /* Gr. strptime is crap for this; it doesn't have a way to require RFC2822 (i.e. English) day/month names, and it doesn't work correctly with %z. */ -int parse_date_toffset(const char *date, unsigned long *timestamp, int *offset) +int parse_date_basic(const char *date, unsigned long *timestamp, int *offset) { struct tm tm; int tm_gmt; @@ -642,17 +642,16 @@ int parse_date_toffset(const char *date, unsigned long *timestamp, int *offset) if (!tm_gmt) *timestamp -= *offset * 60; - return 1; /* success */ + return 0; /* success */ } int parse_date(const char *date, char *result, int maxlen) { unsigned long timestamp; int offset; - if (parse_date_toffset(date, ×tamp, &offset) > 0) - return date_string(timestamp, offset, result, maxlen); - else + if (parse_date_basic(date, ×tamp, &offset)) return -1; + return date_string(timestamp, offset, result, maxlen); } enum date_mode parse_date_format(const char *format) @@ -1004,9 +1003,8 @@ unsigned long approxidate_relative(const char *date, const struct timeval *tv) int offset; int errors = 0; - if (parse_date_toffset(date, ×tamp, &offset) > 0) + if (!parse_date_basic(date, ×tamp, &offset)) return timestamp; - return approxidate_str(date, tv, &errors); } @@ -1019,7 +1017,7 @@ unsigned long approxidate_careful(const char *date, int *error_ret) if (!error_ret) error_ret = &dummy; - if (parse_date_toffset(date, ×tamp, &offset) > 0) { + if (!parse_date_basic(date, ×tamp, &offset)) { *error_ret = 0; return timestamp; } diff --git a/diff-lib.c b/diff-lib.c index 8b8978ae6d..392ce2bef0 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -68,11 +68,16 @@ static int match_stat_with_submodule(struct diff_options *diffopt, unsigned ce_option, unsigned *dirty_submodule) { int changed = ce_match_stat(ce, st, ce_option); - if (S_ISGITLINK(ce->ce_mode) - && !DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES) - && !DIFF_OPT_TST(diffopt, IGNORE_DIRTY_SUBMODULES) - && (!changed || DIFF_OPT_TST(diffopt, DIRTY_SUBMODULES))) { - *dirty_submodule = is_submodule_modified(ce->name, DIFF_OPT_TST(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES)); + if (S_ISGITLINK(ce->ce_mode)) { + unsigned orig_flags = diffopt->flags; + if (!DIFF_OPT_TST(diffopt, OVERRIDE_SUBMODULE_CONFIG)) + set_diffopt_flags_from_submodule_config(diffopt, ce->name); + if (DIFF_OPT_TST(diffopt, IGNORE_SUBMODULES)) + changed = 0; + else if (!DIFF_OPT_TST(diffopt, IGNORE_DIRTY_SUBMODULES) + && (!changed || DIFF_OPT_TST(diffopt, DIRTY_SUBMODULES))) + *dirty_submodule = is_submodule_modified(ce->name, DIFF_OPT_TST(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES)); + diffopt->flags = orig_flags; } return changed; } diff --git a/diff-no-index.c b/diff-no-index.c index 43aeeba2e0..ce9e783407 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -64,7 +64,8 @@ static int queue_diff(struct diff_options *o, if (S_ISDIR(mode1) || S_ISDIR(mode2)) { char buffer1[PATH_MAX], buffer2[PATH_MAX]; - struct string_list p1 = {NULL, 0, 0, 1}, p2 = {NULL, 0, 0, 1}; + struct string_list p1 = STRING_LIST_INIT_DUP; + struct string_list p2 = STRING_LIST_INIT_DUP; int len1 = 0, len2 = 0, i1, i2, ret = 0; if (name1 && read_directory(name1, &p1)) @@ -31,6 +31,7 @@ static const char *external_diff_cmd_cfg; int diff_auto_refresh_index = 1; static int diff_mnemonic_prefix; static int diff_no_prefix; +static struct diff_options default_diff_options; static char diff_colors[][COLOR_MAXLEN] = { GIT_COLOR_RESET, @@ -107,6 +108,9 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) if (!strcmp(var, "diff.wordregex")) return git_config_string(&diff_word_regex_cfg, var, value); + if (!strcmp(var, "diff.ignoresubmodules")) + handle_ignore_submodules_arg(&default_diff_options, value); + return git_diff_basic_config(var, value, cb); } @@ -141,6 +145,9 @@ int git_diff_basic_config(const char *var, const char *value, void *cb) return 0; } + if (!prefixcmp(var, "submodule.")) + return parse_submodule_config_option(var, value); + return git_color_default_config(var, value, cb); } @@ -2819,7 +2826,7 @@ static void run_checkdiff(struct diff_filepair *p, struct diff_options *o) void diff_setup(struct diff_options *options) { - memset(options, 0, sizeof(*options)); + memcpy(options, &default_diff_options, sizeof(*options)); options->file = stdout; @@ -2995,9 +3002,100 @@ static int opt_arg(const char *arg, int arg_short, const char *arg_long, int *va static int diff_scoreopt_parse(const char *opt); +static inline int short_opt(char opt, const char **argv, + const char **optarg) +{ + const char *arg = argv[0]; + if (arg[0] != '-' || arg[1] != opt) + return 0; + if (arg[2] != '\0') { + *optarg = arg + 2; + return 1; + } + if (!argv[1]) + die("Option '%c' requires a value", opt); + *optarg = argv[1]; + return 2; +} + +int parse_long_opt(const char *opt, const char **argv, + const char **optarg) +{ + const char *arg = argv[0]; + if (arg[0] != '-' || arg[1] != '-') + return 0; + arg += strlen("--"); + if (prefixcmp(arg, opt)) + return 0; + arg += strlen(opt); + if (*arg == '=') { /* sticked form: --option=value */ + *optarg = arg + 1; + return 1; + } + if (*arg != '\0') + return 0; + /* separate form: --option value */ + if (!argv[1]) + die("Option '--%s' requires a value", opt); + *optarg = argv[1]; + return 2; +} + +static int stat_opt(struct diff_options *options, const char **av) +{ + const char *arg = av[0]; + char *end; + int width = options->stat_width; + int name_width = options->stat_name_width; + int argcount = 1; + + arg += strlen("--stat"); + end = (char *)arg; + + switch (*arg) { + case '-': + if (!prefixcmp(arg, "-width")) { + arg += strlen("-width"); + if (*arg == '=') + width = strtoul(arg + 1, &end, 10); + else if (!*arg && !av[1]) + die("Option '--stat-width' requires a value"); + else if (!*arg) { + width = strtoul(av[1], &end, 10); + argcount = 2; + } + } else if (!prefixcmp(arg, "-name-width")) { + arg += strlen("-name-width"); + if (*arg == '=') + name_width = strtoul(arg + 1, &end, 10); + else if (!*arg && !av[1]) + die("Option '--stat-name-width' requires a value"); + else if (!*arg) { + name_width = strtoul(av[1], &end, 10); + argcount = 2; + } + } + break; + case '=': + width = strtoul(arg+1, &end, 10); + if (*end == ',') + name_width = strtoul(end+1, &end, 10); + } + + /* Important! This checks all the error cases! */ + if (*end) + return 0; + options->output_format |= DIFF_FORMAT_DIFFSTAT; + options->stat_name_width = name_width; + options->stat_width = width; + return argcount; +} + int diff_opt_parse(struct diff_options *options, const char **av, int ac) { const char *arg = av[0]; + const char *optarg; + int argcount; /* Output format options */ if (!strcmp(arg, "-p") || !strcmp(arg, "-u") || !strcmp(arg, "--patch")) @@ -3034,33 +3132,9 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) options->output_format |= DIFF_FORMAT_NAME_STATUS; else if (!strcmp(arg, "-s")) options->output_format |= DIFF_FORMAT_NO_OUTPUT; - else if (!prefixcmp(arg, "--stat")) { - char *end; - int width = options->stat_width; - int name_width = options->stat_name_width; - arg += 6; - end = (char *)arg; - - switch (*arg) { - case '-': - if (!prefixcmp(arg, "-width=")) - width = strtoul(arg + 7, &end, 10); - else if (!prefixcmp(arg, "-name-width=")) - name_width = strtoul(arg + 12, &end, 10); - break; - case '=': - width = strtoul(arg+1, &end, 10); - if (*end == ',') - name_width = strtoul(end+1, &end, 10); - } - - /* Important! This checks all the error cases! */ - if (*end) - return 0; - options->output_format |= DIFF_FORMAT_DIFFSTAT; - options->stat_name_width = name_width; - options->stat_width = width; - } + else if (!prefixcmp(arg, "--stat")) + /* --stat, --stat-width, or --stat-name-width */ + return stat_opt(options, av); /* renames options */ else if (!prefixcmp(arg, "-B")) { @@ -3154,10 +3228,11 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) else die("bad --word-diff argument: %s", type); } - else if (!prefixcmp(arg, "--word-diff-regex=")) { + else if ((argcount = parse_long_opt("word-diff-regex", av, &optarg))) { if (options->word_diff == DIFF_WORDS_NONE) options->word_diff = DIFF_WORDS_PLAIN; - options->word_regex = arg + 18; + options->word_regex = optarg; + return argcount; } else if (!strcmp(arg, "--exit-code")) DIFF_OPT_SET(options, EXIT_WITH_STATUS); @@ -3171,11 +3246,13 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) DIFF_OPT_SET(options, ALLOW_TEXTCONV); else if (!strcmp(arg, "--no-textconv")) DIFF_OPT_CLR(options, ALLOW_TEXTCONV); - else if (!strcmp(arg, "--ignore-submodules")) + else if (!strcmp(arg, "--ignore-submodules")) { + DIFF_OPT_SET(options, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(options, "all"); - else if (!prefixcmp(arg, "--ignore-submodules=")) + } else if (!prefixcmp(arg, "--ignore-submodules=")) { + DIFF_OPT_SET(options, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(options, arg + 20); - else if (!strcmp(arg, "--submodule")) + } else if (!strcmp(arg, "--submodule")) DIFF_OPT_SET(options, SUBMODULE_LOG); else if (!prefixcmp(arg, "--submodule=")) { if (!strcmp(arg + 12, "log")) @@ -3185,18 +3262,26 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) /* misc options */ else if (!strcmp(arg, "-z")) options->line_termination = 0; - else if (!prefixcmp(arg, "-l")) - options->rename_limit = strtoul(arg+2, NULL, 10); - else if (!prefixcmp(arg, "-S")) - options->pickaxe = arg + 2; + else if ((argcount = short_opt('l', av, &optarg))) { + options->rename_limit = strtoul(optarg, NULL, 10); + return argcount; + } + else if ((argcount = short_opt('S', av, &optarg))) { + options->pickaxe = optarg; + return argcount; + } else if (!strcmp(arg, "--pickaxe-all")) options->pickaxe_opts = DIFF_PICKAXE_ALL; else if (!strcmp(arg, "--pickaxe-regex")) options->pickaxe_opts = DIFF_PICKAXE_REGEX; - else if (!prefixcmp(arg, "-O")) - options->orderfile = arg + 2; - else if (!prefixcmp(arg, "--diff-filter=")) - options->filter = arg + 14; + else if ((argcount = short_opt('O', av, &optarg))) { + options->orderfile = optarg; + return argcount; + } + else if ((argcount = parse_long_opt("diff-filter", av, &optarg))) { + options->filter = optarg; + return argcount; + } else if (!strcmp(arg, "--abbrev")) options->abbrev = DEFAULT_ABBREV; else if (!prefixcmp(arg, "--abbrev=")) { @@ -3206,20 +3291,25 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) else if (40 < options->abbrev) options->abbrev = 40; } - else if (!prefixcmp(arg, "--src-prefix=")) - options->a_prefix = arg + 13; - else if (!prefixcmp(arg, "--dst-prefix=")) - options->b_prefix = arg + 13; + else if ((argcount = parse_long_opt("src-prefix", av, &optarg))) { + options->a_prefix = optarg; + return argcount; + } + else if ((argcount = parse_long_opt("dst-prefix", av, &optarg))) { + options->b_prefix = optarg; + return argcount; + } else if (!strcmp(arg, "--no-prefix")) options->a_prefix = options->b_prefix = ""; else if (opt_arg(arg, '\0', "inter-hunk-context", &options->interhunkcontext)) ; - else if (!prefixcmp(arg, "--output=")) { - options->file = fopen(arg + strlen("--output="), "w"); + else if ((argcount = parse_long_opt("output", av, &optarg))) { + options->file = fopen(optarg, "w"); if (!options->file) die_errno("Could not open '%s'", arg + strlen("--output=")); options->close_file = 1; + return argcount; } else return 0; return 1; @@ -3763,6 +3853,13 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) len2, p->two->path); git_SHA1_Update(&ctx, buffer, len1); + if (diff_filespec_is_binary(p->one) || + diff_filespec_is_binary(p->two)) { + git_SHA1_Update(&ctx, sha1_to_hex(p->one->sha1), 40); + git_SHA1_Update(&ctx, sha1_to_hex(p->two->sha1), 40); + continue; + } + xpp.flags = 0; xecfg.ctxlen = 3; xecfg.flags = XDL_EMIT_FUNCNAMES; @@ -4107,6 +4204,24 @@ int diff_result_code(struct diff_options *opt, int status) return result; } +/* + * Shall changes to this submodule be ignored? + * + * Submodule changes can be configured to be ignored separately for each path, + * but that configuration can be overridden from the command line. + */ +static int is_submodule_ignored(const char *path, struct diff_options *options) +{ + int ignored = 0; + unsigned orig_flags = options->flags; + if (!DIFF_OPT_TST(options, OVERRIDE_SUBMODULE_CONFIG)) + set_diffopt_flags_from_submodule_config(options, path); + if (DIFF_OPT_TST(options, IGNORE_SUBMODULES)) + ignored = 1; + options->flags = orig_flags; + return ignored; +} + void diff_addremove(struct diff_options *options, int addremove, unsigned mode, const unsigned char *sha1, @@ -4114,7 +4229,7 @@ void diff_addremove(struct diff_options *options, { struct diff_filespec *one, *two; - if (DIFF_OPT_TST(options, IGNORE_SUBMODULES) && S_ISGITLINK(mode)) + if (S_ISGITLINK(mode) && is_submodule_ignored(concatpath, options)) return; /* This may look odd, but it is a preparation for @@ -4161,8 +4276,8 @@ void diff_change(struct diff_options *options, { struct diff_filespec *one, *two; - if (DIFF_OPT_TST(options, IGNORE_SUBMODULES) && S_ISGITLINK(old_mode) - && S_ISGITLINK(new_mode)) + if (S_ISGITLINK(old_mode) && S_ISGITLINK(new_mode) && + is_submodule_ignored(concatpath, options)) return; if (DIFF_OPT_TST(options, REVERSE_DIFF)) { @@ -77,6 +77,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data) #define DIFF_OPT_DIRTY_SUBMODULES (1 << 24) #define DIFF_OPT_IGNORE_UNTRACKED_IN_SUBMODULES (1 << 25) #define DIFF_OPT_IGNORE_DIRTY_SUBMODULES (1 << 26) +#define DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG (1 << 27) #define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag) #define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag) @@ -217,6 +218,13 @@ extern void diff_unmerge(struct diff_options *, #define DIFF_SETUP_USE_CACHE 2 #define DIFF_SETUP_USE_SIZE_CACHE 4 +/* + * Poor man's alternative to parse-option, to allow both sticked form + * (--option=value) and separate form (--option value). + */ +extern int parse_long_opt(const char *opt, const char **argv, + const char **optarg); + extern int git_diff_basic_config(const char *var, const char *value, void *cb); extern int git_diff_ui_config(const char *var, const char *value, void *cb); extern int diff_use_color_default; diff --git a/environment.c b/environment.c index 83d38d3c23..eeb26876a1 100644 --- a/environment.c +++ b/environment.c @@ -53,6 +53,7 @@ enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE; char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; +struct startup_info *startup_info; /* Parallel index stat data preload? */ int core_preload_index = 0; diff --git a/fast-import.c b/fast-import.c index ddad289dae..2317b0fe75 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1528,6 +1528,14 @@ static int tree_content_remove( for (i = 0; i < t->entry_count; i++) { e = t->entries[i]; if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) { + if (slash1 && !S_ISDIR(e->versions[1].mode)) + /* + * If p names a file in some subdirectory, and a + * file or symlink matching the name of the + * parent directory of p exists, then p cannot + * exist and need not be deleted. + */ + return 1; if (!slash1 || !S_ISDIR(e->versions[1].mode)) goto del_entry; if (!e->tree) @@ -2131,6 +2139,7 @@ static void file_change_m(struct branch *b) case S_IFREG | 0644: case S_IFREG | 0755: case S_IFLNK: + case S_IFDIR: case S_IFGITLINK: /* ok */ break; @@ -2176,23 +2185,28 @@ static void file_change_m(struct branch *b) * another repository. */ } else if (inline_data) { + if (S_ISDIR(mode)) + die("Directories cannot be specified 'inline': %s", + command_buf.buf); if (p != uq.buf) { strbuf_addstr(&uq, p); p = uq.buf; } read_next_command(); parse_and_store_blob(&last_blob, sha1, 0); - } else if (oe) { - if (oe->type != OBJ_BLOB) - die("Not a blob (actually a %s): %s", - typename(oe->type), command_buf.buf); } else { - enum object_type type = sha1_object_info(sha1, NULL); + enum object_type expected = S_ISDIR(mode) ? + OBJ_TREE: OBJ_BLOB; + enum object_type type = oe ? oe->type : + sha1_object_info(sha1, NULL); if (type < 0) - die("Blob not found: %s", command_buf.buf); - if (type != OBJ_BLOB) - die("Not a blob (actually a %s): %s", - typename(type), command_buf.buf); + die("%s not found: %s", + S_ISDIR(mode) ? "Tree" : "Blob", + command_buf.buf); + if (type != expected) + die("Not a %s (actually a %s): %s", + typename(expected), typename(type), + command_buf.buf); } tree_content_set(&b->branch_tree, p, sha1, mode, NULL); diff --git a/git-compat-util.h b/git-compat-util.h index fe845ae639..877096ecb0 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -312,6 +312,11 @@ extern size_t gitstrlcpy(char *, const char *, size_t); extern uintmax_t gitstrtoumax(const char *, char **, int); #endif +#ifdef NO_STRTOK_R +#define strtok_r gitstrtok_r +extern char *gitstrtok_r(char *s, const char *delim, char **save_ptr); +#endif + #ifdef NO_HSTRERROR #define hstrerror githstrerror extern const char *githstrerror(int herror); diff --git a/git-instaweb.sh b/git-instaweb.sh index b7342e22c8..e6f6ecda17 100755 --- a/git-instaweb.sh +++ b/git-instaweb.sh @@ -57,6 +57,13 @@ resolve_full_httpd () { httpd_only="${httpd%% *}" # cut on first space return ;; + *webrick*) + # server is started by running via generated webrick.rb in + # $fqgitdir/gitweb + full_httpd="$fqgitdir/gitweb/webrick.rb" + httpd_only="${httpd%% *}" # cut on first space + return + ;; esac httpd_only="$(echo $httpd | cut -f1 -d' ')" @@ -188,40 +195,53 @@ GITWEB_CONFIG="$fqgitdir/gitweb/gitweb_config.perl" export GIT_EXEC_PATH GIT_DIR GITWEB_CONFIG webrick_conf () { + # webrick seems to have no way of passing arbitrary environment + # variables to the underlying CGI executable, so we wrap the + # actual gitweb.cgi using a shell script to force it + wrapper="$fqgitdir/gitweb/$httpd/wrapper.sh" + cat > "$wrapper" <<EOF +#!/bin/sh +# we use this shell script wrapper around the real gitweb.cgi since +# there appears to be no other way to pass arbitrary environment variables +# into the CGI process +GIT_EXEC_PATH=$GIT_EXEC_PATH GIT_DIR=$GIT_DIR GITWEB_CONFIG=$GITWEB_CONFIG +export GIT_EXEC_PATH GIT_DIR GITWEB_CONFIG +exec $root/gitweb.cgi +EOF + chmod +x "$wrapper" + + # This assumes _ruby_ is in the user's $PATH. that's _one_ + # portable way to run ruby, which could be installed anywhere, really. # generate a standalone server script in $fqgitdir/gitweb. cat >"$fqgitdir/gitweb/$httpd.rb" <<EOF +#!/usr/bin/env ruby require 'webrick' -require 'yaml' -options = YAML::load_file(ARGV[0]) -options[:StartCallback] = proc do - File.open(options[:PidFile],"w") do |f| - f.puts Process.pid - end -end -options[:ServerType] = WEBrick::Daemon +require 'logger' +options = { + :Port => $port, + :DocumentRoot => "$root", + :Logger => Logger.new('$fqgitdir/gitweb/error.log'), + :AccessLog => [ + [ Logger.new('$fqgitdir/gitweb/access.log'), + WEBrick::AccessLog::COMBINED_LOG_FORMAT ] + ], + :DirectoryIndex => ["gitweb.cgi"], + :CGIInterpreter => "$wrapper", + :StartCallback => lambda do + File.open("$fqgitdir/pid", "w") { |f| f.puts Process.pid } + end, + :ServerType => WEBrick::Daemon, +} +options[:BindAddress] = '127.0.0.1' if "$local" == "true" server = WEBrick::HTTPServer.new(options) ['INT', 'TERM'].each do |signal| trap(signal) {server.shutdown} end server.start EOF - # generate a shell script to invoke the above ruby script, - # which assumes _ruby_ is in the user's $PATH. that's _one_ - # portable way to run ruby, which could be installed anywhere, - # really. - cat >"$fqgitdir/gitweb/$httpd" <<EOF -#!/bin/sh -exec ruby "$fqgitdir/gitweb/$httpd.rb" \$* -EOF - chmod +x "$fqgitdir/gitweb/$httpd" - - cat >"$conf" <<EOF -:Port: $port -:DocumentRoot: "$root" -:DirectoryIndex: ["gitweb.cgi"] -:PidFile: "$fqgitdir/pid" -EOF - test "$local" = true && echo ':BindAddress: "127.0.0.1"' >> "$conf" + chmod +x "$fqgitdir/gitweb/$httpd.rb" + # configuration is embedded in server script file, webrick.rb + rm -f "$conf" } lighttpd_conf () { diff --git a/git-mergetool--lib.sh b/git-mergetool--lib.sh index 51dd0d67ba..b5e1943b1d 100644 --- a/git-mergetool--lib.sh +++ b/git-mergetool--lib.sh @@ -35,7 +35,7 @@ check_unchanged () { while true; do echo "$MERGED seems unchanged." printf "Was the merge successful? [y/n] " - read answer < /dev/tty + read answer case "$answer" in y*|Y*) status=0; break ;; n*|N*) status=1; break ;; diff --git a/git-mergetool.sh b/git-mergetool.sh index b52a7410bc..2f8dc441c6 100755 --- a/git-mergetool.sh +++ b/git-mergetool.sh @@ -264,24 +264,46 @@ merge_keep_temporaries="$(git config --bool mergetool.keepTemporaries || echo fa last_status=0 rollup_status=0 +rerere=false + +files_to_merge() { + if test "$rerere" = true + then + git rerere status + else + git ls-files -u | sed -e 's/^[^ ]* //' | sort -u + fi +} + if test $# -eq 0 ; then - files=$(git ls-files -u | sed -e 's/^[^ ]* //' | sort -u) + cd_to_toplevel + + if test -e "$GIT_DIR/MERGE_RR" + then + rerere=true + fi + + files=$(files_to_merge) if test -z "$files" ; then echo "No files need merging" exit 0 fi - echo Merging the files: "$files" - git ls-files -u | - sed -e 's/^[^ ]* //' | - sort -u | + + # Save original stdin + exec 3<&0 + + printf "Merging:\n" + printf "$files\n" + + files_to_merge | while IFS= read i do if test $last_status -ne 0; then - prompt_after_failed_merge < /dev/tty || exit 1 + prompt_after_failed_merge <&3 || exit 1 fi printf "\n" - merge_file "$i" < /dev/tty > /dev/tty + merge_file "$i" <&3 last_status=$? if test $last_status -ne 0; then rollup_status=1 diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh index b94c2a0386..eb2dff55f8 100755 --- a/git-rebase--interactive.sh +++ b/git-rebase--interactive.sh @@ -111,11 +111,12 @@ VERBOSE= OK_TO_SKIP_PRE_REBASE= REBASE_ROOT= AUTOSQUASH= +test "$(git config --bool rebase.autosquash)" = "true" && AUTOSQUASH=t NEVER_FF= -GIT_CHERRY_PICK_HELP=" After resolving the conflicts, -mark the corrected paths with 'git add <paths>', and -run 'git rebase --continue'" +GIT_CHERRY_PICK_HELP="\ +hint: after resolving the conflicts, mark the corrected paths +hint: with 'git add <paths>' and run 'git rebase --continue'" export GIT_CHERRY_PICK_HELP warn () { @@ -537,6 +538,34 @@ do_next () { esac record_in_rewritten $sha1 ;; + x|"exec") + read -r command rest < "$TODO" + mark_action_done + printf 'Executing: %s\n' "$rest" + # "exec" command doesn't take a sha1 in the todo-list. + # => can't just use $sha1 here. + git rev-parse --verify HEAD > "$DOTEST"/stopped-sha + ${SHELL:-@SHELL_PATH@} -c "$rest" # Actual execution + status=$? + if test "$status" -ne 0 + then + warn "Execution failed: $rest" + warn "You can fix the problem, and then run" + warn + warn " git rebase --continue" + warn + exit "$status" + fi + # Run in subshell because require_clean_work_tree can die. + if ! (require_clean_work_tree) + then + warn "Commit or stash your changes, and then run" + warn + warn " git rebase --continue" + warn + exit 1 + fi + ;; *) warn "Unknown command: $command $sha1 $rest" if git rev-parse --verify -q "$sha1" >/dev/null @@ -591,22 +620,30 @@ do_rest () { # skip picking commits whose parents are unchanged skip_unnecessary_picks () { fd=3 - while read -r command sha1 rest + while read -r command rest do # fd=3 means we skip the command - case "$fd,$command,$(git rev-parse --verify --quiet $sha1^)" in - 3,pick,"$ONTO"*|3,p,"$ONTO"*) + case "$fd,$command" in + 3,pick|3,p) # pick a commit whose parent is current $ONTO -> skip - ONTO=$sha1 + sha1=$(printf '%s' "$rest" | cut -d ' ' -f 1) + case "$(git rev-parse --verify --quiet "$sha1"^)" in + "$ONTO"*) + ONTO=$sha1 + ;; + *) + fd=1 + ;; + esac ;; - 3,#*|3,,*) + 3,#*|3,) # copy comments ;; *) fd=1 ;; esac - printf '%s\n' "$command${sha1:+ }$sha1${rest:+ }$rest" >&$fd + printf '%s\n' "$command${rest:+ }$rest" >&$fd done <"$TODO" >"$TODO.new" 3>>"$DONE" && mv -f "$TODO".new "$TODO" && case "$(peek_next_command)" in @@ -795,6 +832,9 @@ first and then run 'git rebase --continue' again." --autosquash) AUTOSQUASH=t ;; + --no-autosquash) + AUTOSQUASH= + ;; --onto) shift ONTO=$(parse_onto "$1") || @@ -957,6 +997,7 @@ first and then run 'git rebase --continue' again." # e, edit = use commit, but stop for amending # s, squash = use commit, but meld into previous commit # f, fixup = like "squash", but discard this commit's log message +# x <cmd>, exec <cmd> = Run a shell command <cmd>, and stop if it fails # # If you remove a line here THAT COMMIT WILL BE LOST. # However, if you remove everything, the rebase will be aborted. diff --git a/git-rebase.sh b/git-rebase.sh index 1b9ea48cd7..7508463b30 100755 --- a/git-rebase.sh +++ b/git-rebase.sh @@ -44,6 +44,7 @@ To restore the original branch and stop rebasing run \"git rebase --abort\". " unset newbase strategy=recursive +strategy_opts= do_merge= dotest="$GIT_DIR"/rebase-merge prec=4 @@ -112,7 +113,7 @@ call_merge () { then export GIT_MERGE_VERBOSITY=1 fi - git-merge-$strategy "$cmt^" -- "$hd" "$cmt" + eval 'git-merge-$strategy' $strategy_opts '"$cmt^" -- "$hd" "$cmt"' rv=$? case "$rv" in 0) @@ -294,6 +295,27 @@ do -M|-m|--m|--me|--mer|--merg|--merge) do_merge=t ;; + -X*|--strategy-option*) + case "$#,$1" in + 1,-X|1,--strategy-option) + usage ;; + *,-X|*,--strategy-option) + newopt="$2" + shift ;; + *,--strategy-option=*) + newopt="$(expr " $1" : ' --strategy-option=\(.*\)')" ;; + *,-X*) + newopt="$(expr " $1" : ' -X\(.*\)')" ;; + 1,*) + usage ;; + esac + strategy_opts="$strategy_opts $(git rev-parse --sq-quote "--$newopt")" + do_merge=t + if test -n "$strategy" + then + strategy=recursive + fi + ;; -s=*|--s=*|--st=*|--str=*|--stra=*|--strat=*|--strate=*|\ --strateg=*|--strategy=*|\ -s|--s|--st|--str|--stra|--strat|--strate|--strateg|--strategy) diff --git a/git-stash.sh b/git-stash.sh index 1d95447d03..7ce818bd1b 100755 --- a/git-stash.sh +++ b/git-stash.sh @@ -210,56 +210,146 @@ list_stash () { } show_stash () { - have_stash || die 'No stash found' + assert_stash_like "$@" - flags=$(git rev-parse --no-revs --flags "$@") - if test -z "$flags" - then - flags=--stat - fi - - w_commit=$(git rev-parse --quiet --verify --default $ref_stash "$@") && - b_commit=$(git rev-parse --quiet --verify "$w_commit^") || - die "'$*' is not a stash" - - git diff $flags $b_commit $w_commit + git diff ${FLAGS:---stat} $b_commit $w_commit } -apply_stash () { - applied_stash= - unstash_index= - - while test $# != 0 +# +# Parses the remaining options looking for flags and +# at most one revision defaulting to ${ref_stash}@{0} +# if none found. +# +# Derives related tree and commit objects from the +# revision, if one is found. +# +# stash records the work tree, and is a merge between the +# base commit (first parent) and the index tree (second parent). +# +# REV is set to the symbolic version of the specified stash-like commit +# IS_STASH_LIKE is non-blank if ${REV} looks like a stash +# IS_STASH_REF is non-blank if the ${REV} looks like a stash ref +# s is set to the SHA1 of the stash commit +# w_commit is set to the commit containing the working tree +# b_commit is set to the base commit +# i_commit is set to the commit containing the index tree +# w_tree is set to the working tree +# b_tree is set to the base tree +# i_tree is set to the index tree +# +# GIT_QUIET is set to t if -q is specified +# INDEX_OPTION is set to --index if --index is specified. +# FLAGS is set to the remaining flags +# +# dies if: +# * too many revisions specified +# * no revision is specified and there is no stash stack +# * a revision is specified which cannot be resolve to a SHA1 +# * a non-existent stash reference is specified +# + +parse_flags_and_rev() +{ + test "$PARSE_CACHE" = "$*" && return 0 # optimisation + PARSE_CACHE="$*" + + IS_STASH_LIKE= + IS_STASH_REF= + INDEX_OPTION= + s= + w_commit= + b_commit= + i_commit= + w_tree= + b_tree= + i_tree= + + REV=$(git rev-parse --no-flags --symbolic "$@" 2>/dev/null) + FLAGS=$(git rev-parse --no-revs -- "$@" 2>/dev/null) + + set -- $FLAGS + + FLAGS= + while test $# -ne 0 do case "$1" in - --index) - unstash_index=t + -q|--quiet) + GIT_QUIET=-t ;; - -q|--quiet) - GIT_QUIET=t + --index) + INDEX_OPTION=--index ;; - *) - break + --) + : + ;; + *) + FLAGS="${FLAGS}${FLAGS:+ }$1" ;; esac shift done - if test $# = 0 + set -- $REV + + case $# in + 0) + have_stash || die "No stash found." + set -- ${ref_stash}@{0} + ;; + 1) + : + ;; + *) + die "Too many revisions specified: $REV" + ;; + esac + + REV=$(git rev-parse --quiet --symbolic --verify $1 2>/dev/null) || die "$1 is not valid reference" + + i_commit=$(git rev-parse --quiet --verify $REV^2 2>/dev/null) && + set -- $(git rev-parse $REV $REV^1 $REV: $REV^1: $REV^2: 2>/dev/null) && + s=$1 && + w_commit=$1 && + b_commit=$2 && + w_tree=$3 && + b_tree=$4 && + i_tree=$5 && + IS_STASH_LIKE=t && + test "$ref_stash" = "$(git rev-parse --symbolic-full-name "${REV%@*}")" && + IS_STASH_REF=t + + if test "${REV}" != "${REV%{*\}}" then - have_stash || die 'Nothing to apply' - applied_stash="$ref_stash@{0}" - else - applied_stash="$*" + # maintainers: it would be better if git rev-parse indicated + # this condition with a non-zero status code but as of 1.7.2.1 it + # it did not. So, we use non-empty stderr output as a proxy for the + # condition of interest. + test -z "$(git rev-parse "$REV" 2>&1 >/dev/null)" || die "$REV does not exist in the stash log" fi - # stash records the work tree, and is a merge between the - # base commit (first parent) and the index tree (second parent). - s=$(git rev-parse --quiet --verify --default $ref_stash "$@") && - w_tree=$(git rev-parse --quiet --verify "$s:") && - b_tree=$(git rev-parse --quiet --verify "$s^1:") && - i_tree=$(git rev-parse --quiet --verify "$s^2:") || - die "$*: no valid stashed state found" +} + +is_stash_like() +{ + parse_flags_and_rev "$@" + test -n "$IS_STASH_LIKE" +} + +assert_stash_like() { + is_stash_like "$@" || die "'$*' is not a stash-like commit" +} + +is_stash_ref() { + is_stash_like "$@" && test -n "$IS_STASH_REF" +} + +assert_stash_ref() { + is_stash_ref "$@" || die "'$*' is not a stash reference" +} + +apply_stash () { + + assert_stash_like "$@" git update-index -q --refresh && git diff-files --quiet --ignore-submodules || @@ -270,7 +360,7 @@ apply_stash () { die 'Cannot apply a stash in the middle of a merge' unstashed_index_tree= - if test -n "$unstash_index" && test "$b_tree" != "$i_tree" && + if test -n "$INDEX_OPTION" && test "$b_tree" != "$i_tree" && test "$c_tree" != "$i_tree" then git diff-tree --binary $s^2^..$s^2 | git apply --cached @@ -315,7 +405,7 @@ apply_stash () { else # Merge conflict; keep the exit status from merge-recursive status=$? - if test -n "$unstash_index" + if test -n "$INDEX_OPTION" then echo >&2 'Index was not unstashed.' fi @@ -323,58 +413,38 @@ apply_stash () { fi } -drop_stash () { - have_stash || die 'No stash entries to drop' +pop_stash() { + assert_stash_ref "$@" - while test $# != 0 - do - case "$1" in - -q|--quiet) - GIT_QUIET=t - ;; - *) - break - ;; - esac - shift - done + apply_stash "$@" && + drop_stash "$@" +} - if test $# = 0 - then - set x "$ref_stash@{0}" - shift - fi - # Verify supplied argument looks like a stash entry - s=$(git rev-parse --verify "$@") && - git rev-parse --verify "$s:" > /dev/null 2>&1 && - git rev-parse --verify "$s^1:" > /dev/null 2>&1 && - git rev-parse --verify "$s^2:" > /dev/null 2>&1 || - die "$*: not a valid stashed state" +drop_stash () { + assert_stash_ref "$@" - git reflog delete --updateref --rewrite "$@" && - say "Dropped $* ($s)" || die "$*: Could not drop stash entry" + git reflog delete --updateref --rewrite "${REV}" && + say "Dropped ${REV} ($s)" || die "${REV}: Could not drop stash entry" # clear_stash if we just dropped the last stash entry git rev-parse --verify "$ref_stash@{0}" > /dev/null 2>&1 || clear_stash } apply_to_branch () { - have_stash || die 'Nothing to apply' - test -n "$1" || die 'No branch name specified' branch=$1 + shift 1 - if test -z "$2" - then - set x "$ref_stash@{0}" - fi - stash=$2 + set -- --index "$@" + assert_stash_like "$@" - git checkout -b $branch $stash^ && - apply_stash --index $stash && - drop_stash $stash + git checkout -b $branch $REV^ && + apply_stash "$@" + + test -z "$IS_STASH_REF" || drop_stash "$@" } +PARSE_CACHE='--not-parsed' # The default command is "save" if nothing but options are given seen_non_option= for opt @@ -422,10 +492,7 @@ drop) ;; pop) shift - if apply_stash "$@" - then - drop_stash "$applied_stash" - fi + pop_stash "$@" ;; branch) shift diff --git a/git-svn.perl b/git-svn.perl index c4163584a9..9b046b693f 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -494,6 +494,7 @@ sub cmd_set_tree { sub cmd_dcommit { my $head = shift; + command_noisy(qw/update-index --refresh/); git_cmd_try { command_oneline(qw/diff-index --quiet HEAD/) } 'Cannot dcommit with a dirty index. Commit your changes first, ' . "or stash them with `git stash'.\n"; @@ -1819,6 +1820,7 @@ sub read_all_remotes { die("svn-remote.$remote: remote ref '$remote_ref' " . "must start with 'refs/'\n") unless $remote_ref =~ m{^refs/}; + $local_ref = uri_decode($local_ref); $r->{$remote}->{fetch}->{$local_ref} = $remote_ref; $r->{$remote}->{svm} = {} if $use_svm_props; } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) { @@ -1831,6 +1833,7 @@ sub read_all_remotes { die("svn-remote.$remote: remote ref '$remote_ref' ($t) " . "must start with 'refs/'\n") unless $remote_ref =~ m{^refs/}; + $local_ref = uri_decode($local_ref); my $rs = { t => $t, remote => $remote, @@ -2956,18 +2959,29 @@ sub other_gs { my $gs = Git::SVN->find_by_url($new_url, $url, $branch_from); unless ($gs) { my $ref_id = $old_ref_id; - $ref_id =~ s/\@\d+$//; + $ref_id =~ s/\@\d+-*$//; $ref_id .= "\@$r"; # just grow a tail if we're not unique enough :x $ref_id .= '-' while find_ref($ref_id); - print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1; my ($u, $p, $repo_id) = ($new_url, '', $ref_id); if ($u =~ s#^\Q$url\E(/|$)##) { $p = $u; $u = $url; $repo_id = $self->{repo_id}; } - $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1); + while (1) { + # It is possible to tag two different subdirectories at + # the same revision. If the url for an existing ref + # does not match, we must either find a ref with a + # matching url or create a new ref by growing a tail. + $gs = Git::SVN->init($u, $p, $repo_id, $ref_id, 1); + my (undef, $max_commit) = $gs->rev_map_max(1); + last if (!$max_commit); + my ($url) = ::cmt_metadata($max_commit); + last if ($url eq $gs->full_url); + $ref_id .= '-'; + } + print STDERR "Initializing parent: $ref_id\n" unless $::_q > 1; } $gs } @@ -4050,6 +4064,7 @@ sub new { $self->{absent_dir} = {}; $self->{absent_file} = {}; $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new }); + $self->{pathnameencoding} = Git::config('svn.pathnameencoding'); $self; } @@ -4133,6 +4148,10 @@ sub open_directory { sub git_path { my ($self, $path) = @_; + if (my $enc = $self->{pathnameencoding}) { + require Encode; + Encode::from_to($path, 'UTF-8', $enc); + } if ($self->{path_strip}) { $path =~ s!$self->{path_strip}!! or die "Failed to strip path '$path' ($self->{path_strip})\n"; @@ -4521,6 +4540,10 @@ sub split_path { sub repo_path { my ($self, $path) = @_; + if (my $enc = $self->{pathnameencoding}) { + require Encode; + Encode::from_to($path, $enc, 'UTF-8'); + } $self->{path_prefix}.(defined $path ? $path : ''); } @@ -14,6 +14,7 @@ const char git_usage_string[] = const char git_more_info_string[] = "See 'git help COMMAND' for more information on a specific command."; +static struct startup_info git_startup_info; static int use_pager = -1; struct pager_config { const char *cmd; @@ -188,7 +189,8 @@ static int handle_alias(int *argcp, const char ***argv) } count = split_cmdline(alias_string, &new_argv); if (count < 0) - die("Bad alias.%s string", alias_command); + die("Bad alias.%s string: %s", alias_command, + split_cmdline_strerror(count)); option_count = handle_options(&new_argv, &count, &envchanged); if (envchanged) die("alias '%s' changes environment variables\n" @@ -229,13 +231,14 @@ static int handle_alias(int *argcp, const char ***argv) const char git_version_string[] = GIT_VERSION; -#define RUN_SETUP (1<<0) -#define USE_PAGER (1<<1) +#define RUN_SETUP (1<<0) +#define RUN_SETUP_GENTLY (1<<1) +#define USE_PAGER (1<<2) /* * require working tree to be present -- anything uses this needs * RUN_SETUP for reading from the configuration file. */ -#define NEED_WORK_TREE (1<<2) +#define NEED_WORK_TREE (1<<3) struct cmd_struct { const char *cmd; @@ -254,8 +257,12 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (!help) { if (p->option & RUN_SETUP) prefix = setup_git_directory(); + if (p->option & RUN_SETUP_GENTLY) { + int nongit_ok; + prefix = setup_git_directory_gently(&nongit_ok); + } - if (use_pager == -1 && p->option & RUN_SETUP) + if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY)) use_pager = check_pager_config(p->cmd); if (use_pager == -1 && p->option & USE_PAGER) use_pager = 1; @@ -295,12 +302,12 @@ static void handle_internal_command(int argc, const char **argv) { "add", cmd_add, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, { "annotate", cmd_annotate, RUN_SETUP }, - { "apply", cmd_apply }, + { "apply", cmd_apply, RUN_SETUP_GENTLY }, { "archive", cmd_archive }, { "bisect--helper", cmd_bisect__helper, RUN_SETUP | NEED_WORK_TREE }, { "blame", cmd_blame, RUN_SETUP }, { "branch", cmd_branch, RUN_SETUP }, - { "bundle", cmd_bundle }, + { "bundle", cmd_bundle, RUN_SETUP_GENTLY }, { "cat-file", cmd_cat_file, RUN_SETUP }, { "checkout", cmd_checkout, RUN_SETUP | NEED_WORK_TREE }, { "checkout-index", cmd_checkout_index, @@ -313,7 +320,7 @@ static void handle_internal_command(int argc, const char **argv) { "clean", cmd_clean, RUN_SETUP | NEED_WORK_TREE }, { "commit", cmd_commit, RUN_SETUP | NEED_WORK_TREE }, { "commit-tree", cmd_commit_tree, RUN_SETUP }, - { "config", cmd_config }, + { "config", cmd_config, RUN_SETUP_GENTLY }, { "count-objects", cmd_count_objects, RUN_SETUP }, { "describe", cmd_describe, RUN_SETUP }, { "diff", cmd_diff }, @@ -330,21 +337,21 @@ static void handle_internal_command(int argc, const char **argv) { "fsck-objects", cmd_fsck, RUN_SETUP }, { "gc", cmd_gc, RUN_SETUP }, { "get-tar-commit-id", cmd_get_tar_commit_id }, - { "grep", cmd_grep }, + { "grep", cmd_grep, RUN_SETUP_GENTLY }, { "hash-object", cmd_hash_object }, { "help", cmd_help }, - { "index-pack", cmd_index_pack }, + { "index-pack", cmd_index_pack, RUN_SETUP_GENTLY }, { "init", cmd_init_db }, { "init-db", cmd_init_db }, { "log", cmd_log, RUN_SETUP }, { "ls-files", cmd_ls_files, RUN_SETUP }, { "ls-tree", cmd_ls_tree, RUN_SETUP }, - { "ls-remote", cmd_ls_remote }, + { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, { "mailinfo", cmd_mailinfo }, { "mailsplit", cmd_mailsplit }, { "merge", cmd_merge, RUN_SETUP | NEED_WORK_TREE }, { "merge-base", cmd_merge_base, RUN_SETUP }, - { "merge-file", cmd_merge_file }, + { "merge-file", cmd_merge_file, RUN_SETUP_GENTLY }, { "merge-index", cmd_merge_index, RUN_SETUP }, { "merge-ours", cmd_merge_ours, RUN_SETUP }, { "merge-recursive", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE }, @@ -360,7 +367,7 @@ static void handle_internal_command(int argc, const char **argv) { "pack-objects", cmd_pack_objects, RUN_SETUP }, { "pack-redundant", cmd_pack_redundant, RUN_SETUP }, { "patch-id", cmd_patch_id }, - { "peek-remote", cmd_ls_remote }, + { "peek-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, { "pickaxe", cmd_blame, RUN_SETUP }, { "prune", cmd_prune, RUN_SETUP }, { "prune-packed", cmd_prune_packed, RUN_SETUP }, @@ -370,7 +377,7 @@ static void handle_internal_command(int argc, const char **argv) { "reflog", cmd_reflog, RUN_SETUP }, { "remote", cmd_remote, RUN_SETUP }, { "replace", cmd_replace, RUN_SETUP }, - { "repo-config", cmd_config }, + { "repo-config", cmd_config, RUN_SETUP_GENTLY }, { "rerere", cmd_rerere, RUN_SETUP }, { "reset", cmd_reset, RUN_SETUP }, { "rev-list", cmd_rev_list, RUN_SETUP }, @@ -378,7 +385,7 @@ static void handle_internal_command(int argc, const char **argv) { "revert", cmd_revert, RUN_SETUP | NEED_WORK_TREE }, { "rm", cmd_rm, RUN_SETUP }, { "send-pack", cmd_send_pack, RUN_SETUP }, - { "shortlog", cmd_shortlog, USE_PAGER }, + { "shortlog", cmd_shortlog, RUN_SETUP_GENTLY | USE_PAGER }, { "show-branch", cmd_show_branch, RUN_SETUP }, { "show", cmd_show, RUN_SETUP }, { "status", cmd_status, RUN_SETUP | NEED_WORK_TREE }, @@ -392,7 +399,7 @@ static void handle_internal_command(int argc, const char **argv) { "update-ref", cmd_update_ref, RUN_SETUP }, { "update-server-info", cmd_update_server_info, RUN_SETUP }, { "upload-archive", cmd_upload_archive }, - { "var", cmd_var }, + { "var", cmd_var, RUN_SETUP_GENTLY }, { "verify-tag", cmd_verify_tag, RUN_SETUP }, { "version", cmd_version }, { "whatchanged", cmd_whatchanged, RUN_SETUP }, @@ -489,6 +496,8 @@ int main(int argc, const char **argv) { const char *cmd; + startup_info = &git_startup_info; + cmd = git_extract_argv0_path(argv[0]); if (!cmd) cmd = "git-help"; diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 84261bba34..a85e2f6319 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -232,6 +232,29 @@ our %avatar_size = ( # Leave it undefined (or set to 'undef') to turn off load checking. our $maxload = 300; +# configuration for 'highlight' (http://www.andre-simon.de/) +# match by basename +our %highlight_basename = ( + #'Program' => 'py', + #'Library' => 'py', + 'SConstruct' => 'py', # SCons equivalent of Makefile + 'Makefile' => 'make', +); +# match by extension +our %highlight_ext = ( + # main extensions, defining name of syntax; + # see files in /usr/share/highlight/langDefs/ directory + map { $_ => $_ } + qw(py c cpp rb java css php sh pl js tex bib xml awk bat ini spec tcl), + # alternate extensions, see /etc/highlight/filetypes.conf + 'h' => 'c', + map { $_ => 'cpp' } qw(cxx c++ cc), + map { $_ => 'php' } qw(php3 php4), + map { $_ => 'pl' } qw(perl pm), # perhaps also 'cgi' + 'mak' => 'make', + map { $_ => 'xml' } qw(xhtml html htm), +); + # You define site-wide feature defaults here; override them with # $GITWEB_CONFIG as necessary. our %feature = ( @@ -1037,8 +1060,12 @@ sub run_request { reset_timer(); evaluate_uri(); + evaluate_gitweb_config(); check_loadavg(); + # $projectroot and $projects_list might be set in gitweb config file + $projects_list ||= $projectroot; + evaluate_query_params(); evaluate_path_info(); evaluate_and_validate_params(); @@ -1086,12 +1113,8 @@ sub evaluate_argv { sub run { evaluate_argv(); - evaluate_gitweb_config(); evaluate_git_version(); - # $projectroot and $projects_list might be set in gitweb config file - $projects_list ||= $projectroot; - $pre_listen_hook->() if $pre_listen_hook; @@ -1102,7 +1125,7 @@ sub run { run_request(); - $pre_dispatch_hook->() + $post_dispatch_hook->() if $post_dispatch_hook; last REQUEST if ($is_last_request->()); @@ -3316,30 +3339,6 @@ sub blob_contenttype { sub guess_file_syntax { my ($highlight, $mimetype, $file_name) = @_; return undef unless ($highlight && defined $file_name); - - # configuration for 'highlight' (http://www.andre-simon.de/) - # match by basename - my %highlight_basename = ( - #'Program' => 'py', - #'Library' => 'py', - 'SConstruct' => 'py', # SCons equivalent of Makefile - 'Makefile' => 'make', - ); - # match by extension - my %highlight_ext = ( - # main extensions, defining name of syntax; - # see files in /usr/share/highlight/langDefs/ directory - map { $_ => $_ } - qw(py c cpp rb java css php sh pl js tex bib xml awk bat ini spec tcl), - # alternate extensions, see /etc/highlight/filetypes.conf - 'h' => 'c', - map { $_ => 'cpp' } qw(cxx c++ cc), - map { $_ => 'php' } qw(php3 php4), - map { $_ => 'pl' } qw(perl pm), # perhaps also 'cgi' - 'mak' => 'make', - map { $_ => 'xml' } qw(xhtml html htm), - ); - my $basename = basename($file_name, '.in'); return $highlight_basename{$basename} if exists $highlight_basename{$basename}; @@ -5192,15 +5191,15 @@ sub git_summary { } sub git_tag { - my $head = git_get_head_hash($project); - git_header_html(); - git_print_page_nav('','', $head,undef,$head); my %tag = parse_tag($hash); if (! %tag) { die_error(404, "Unknown tag object"); } + my $head = git_get_head_hash($project); + git_header_html(); + git_print_page_nav('','', $head,undef,$head); git_print_header_div('commit', esc_html($tag{'name'}), $hash); print "<div class=\"title_text\">\n" . "<table class=\"object_header\">\n" . @@ -8,17 +8,6 @@ /* Internal API */ /* - * Output the next line for a graph. - * This formats the next graph line into the specified strbuf. It is not - * terminated with a newline. - * - * Returns 1 if the line includes the current commit, and 0 otherwise. - * graph_next_line() will return 1 exactly once for each time - * graph_update() is called. - */ -static int graph_next_line(struct git_graph *graph, struct strbuf *sb); - -/* * Output a padding line in the graph. * This is similar to graph_next_line(). However, it is guaranteed to * never print the current commit line. Instead, if the commit line is @@ -73,7 +62,7 @@ enum graph_state { /* * The list of available column colors. */ -static char column_colors[][COLOR_MAXLEN] = { +static const char *column_colors_ansi[] = { GIT_COLOR_RED, GIT_COLOR_GREEN, GIT_COLOR_YELLOW, @@ -86,23 +75,33 @@ static char column_colors[][COLOR_MAXLEN] = { GIT_COLOR_BOLD_BLUE, GIT_COLOR_BOLD_MAGENTA, GIT_COLOR_BOLD_CYAN, + GIT_COLOR_RESET, }; -#define COLUMN_COLORS_MAX (ARRAY_SIZE(column_colors)) +#define COLUMN_COLORS_ANSI_MAX (ARRAY_SIZE(column_colors_ansi) - 1) + +static const char **column_colors; +static unsigned short column_colors_max; -static const char *column_get_color_code(const struct column *c) +void graph_set_column_colors(const char **colors, unsigned short colors_max) { - return column_colors[c->color]; + column_colors = colors; + column_colors_max = colors_max; +} + +static const char *column_get_color_code(unsigned short color) +{ + return column_colors[color]; } static void strbuf_write_column(struct strbuf *sb, const struct column *c, char col_char) { - if (c->color < COLUMN_COLORS_MAX) - strbuf_addstr(sb, column_get_color_code(c)); + if (c->color < column_colors_max) + strbuf_addstr(sb, column_get_color_code(c->color)); strbuf_addch(sb, col_char); - if (c->color < COLUMN_COLORS_MAX) - strbuf_addstr(sb, GIT_COLOR_RESET); + if (c->color < column_colors_max) + strbuf_addstr(sb, column_get_color_code(column_colors_max)); } struct git_graph { @@ -226,6 +225,11 @@ static struct strbuf *diff_output_prefix_callback(struct diff_options *opt, void struct git_graph *graph_init(struct rev_info *opt) { struct git_graph *graph = xmalloc(sizeof(struct git_graph)); + + if (!column_colors) + graph_set_column_colors(column_colors_ansi, + COLUMN_COLORS_ANSI_MAX); + graph->commit = NULL; graph->revs = opt; graph->num_parents = 0; @@ -242,7 +246,7 @@ struct git_graph *graph_init(struct rev_info *opt) * always increment it for the first commit we output. * This way we start at 0 for the first commit. */ - graph->default_column_color = COLUMN_COLORS_MAX - 1; + graph->default_column_color = column_colors_max - 1; /* * Allocate a reasonably large default number of columns @@ -365,7 +369,7 @@ static struct commit_list *first_interesting_parent(struct git_graph *graph) static unsigned short graph_get_current_column_color(const struct git_graph *graph) { if (!DIFF_OPT_TST(&graph->revs->diffopt, COLOR_DIFF)) - return COLUMN_COLORS_MAX; + return column_colors_max; return graph->default_column_color; } @@ -375,7 +379,7 @@ static unsigned short graph_get_current_column_color(const struct git_graph *gra static void graph_increment_column_color(struct git_graph *graph) { graph->default_column_color = (graph->default_column_color + 1) % - COLUMN_COLORS_MAX; + column_colors_max; } static unsigned short graph_find_commit_color(const struct git_graph *graph, @@ -1143,7 +1147,7 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct strbuf graph_update_state(graph, GRAPH_PADDING); } -static int graph_next_line(struct git_graph *graph, struct strbuf *sb) +int graph_next_line(struct git_graph *graph, struct strbuf *sb) { switch (graph->state) { case GRAPH_PADDING: @@ -5,6 +5,23 @@ struct git_graph; /* + * Set up a custom scheme for column colors. + * + * The default column color scheme inserts ANSI color escapes to colorize + * the graph. The various color escapes are stored in an array of strings + * where each entry corresponds to a color, except for the last entry, + * which denotes the escape for resetting the color back to the default. + * When generating the graph, strings from this array are inserted before + * and after the various column characters. + * + * This function allows you to enable a custom array of color escapes. + * The 'colors_max' argument is the index of the last "reset" entry. + * + * This functions must be called BEFORE graph_init() is called. + */ +void graph_set_column_colors(const char **colors, unsigned short colors_max); + +/* * Create a new struct git_graph. */ struct git_graph *graph_init(struct rev_info *opt); @@ -32,6 +49,17 @@ void graph_update(struct git_graph *graph, struct commit *commit); */ int graph_is_commit_finished(struct git_graph const *graph); +/* + * Output the next line for a graph. + * This formats the next graph line into the specified strbuf. It is not + * terminated with a newline. + * + * Returns 1 if the line includes the current commit, and 0 otherwise. + * graph_next_line() will return 1 exactly once for each time + * graph_update() is called. + */ +int graph_next_line(struct git_graph *graph, struct strbuf *sb); + /* * graph_show_*: helper functions for printing to stdout @@ -41,6 +41,7 @@ static long curl_low_speed_time = -1; static int curl_ftp_no_epsv; static const char *curl_http_proxy; static char *user_name, *user_pass; +static const char *user_agent; #if LIBCURL_VERSION_NUM >= 0x071700 /* Use CURLOPT_KEYPASSWD as is */ @@ -196,6 +197,9 @@ static int http_options(const char *var, const char *value, void *cb) return 0; } + if (!strcmp("http.useragent", var)) + return git_config_string(&user_agent, var, value); + /* Fall back on the default ones */ return git_default_config(var, value, cb); } @@ -279,7 +283,8 @@ static CURL *get_curl_handle(void) if (getenv("GIT_CURL_VERBOSE")) curl_easy_setopt(result, CURLOPT_VERBOSE, 1); - curl_easy_setopt(result, CURLOPT_USERAGENT, GIT_USER_AGENT); + curl_easy_setopt(result, CURLOPT_USERAGENT, + user_agent ? user_agent : GIT_HTTP_USER_AGENT); if (curl_ftp_no_epsv) curl_easy_setopt(result, CURLOPT_FTP_USE_EPSV, 0); @@ -380,6 +385,8 @@ void http_init(struct remote *remote) #endif set_from_env(&ssl_cainfo, "GIT_SSL_CAINFO"); + set_from_env(&user_agent, "GIT_HTTP_USER_AGENT"); + low_speed_limit = getenv("GIT_HTTP_LOW_SPEED_LIMIT"); if (low_speed_limit != NULL) curl_low_speed_limit = strtol(low_speed_limit, NULL, 10); diff --git a/ll-merge.c b/ll-merge.c index 3764a1ab72..6bb3095c3a 100644 --- a/ll-merge.c +++ b/ll-merge.c @@ -46,7 +46,7 @@ static int ll_binary_merge(const struct ll_merge_driver *drv_unused, * or common ancestor for an internal merge. Still return * "conflicted merge" status. */ - mmfile_t *stolen = (flag & 01) ? orig : src1; + mmfile_t *stolen = (flag & LL_OPT_VIRTUAL_ANCESTOR) ? orig : src1; result->ptr = stolen->ptr; result->size = stolen->size; @@ -79,7 +79,7 @@ static int ll_xdl_merge(const struct ll_merge_driver *drv_unused, memset(&xmp, 0, sizeof(xmp)); xmp.level = XDL_MERGE_ZEALOUS; - xmp.favor= (flag >> 1) & 03; + xmp.favor = ll_opt_favor(flag); if (git_xmerge_style >= 0) xmp.style = git_xmerge_style; if (marker_size > 0) @@ -99,7 +99,8 @@ static int ll_union_merge(const struct ll_merge_driver *drv_unused, int flag, int marker_size) { /* Use union favor */ - flag = (flag & 1) | (XDL_MERGE_FAVOR_UNION << 1); + flag &= ~LL_OPT_FAVOR_MASK; + flag |= create_ll_flag(XDL_MERGE_FAVOR_UNION); return ll_xdl_merge(drv_unused, result, path_unused, orig, NULL, src1, NULL, src2, NULL, flag, marker_size); @@ -321,6 +322,16 @@ static int git_path_check_merge(const char *path, struct git_attr_check check[2] return git_checkattr(path, 2, check); } +static void normalize_file(mmfile_t *mm, const char *path) +{ + struct strbuf strbuf = STRBUF_INIT; + if (renormalize_buffer(path, mm->ptr, mm->size, &strbuf)) { + free(mm->ptr); + mm->size = strbuf.len; + mm->ptr = strbuf_detach(&strbuf, NULL); + } +} + int ll_merge(mmbuffer_t *result_buf, const char *path, mmfile_t *ancestor, const char *ancestor_label, @@ -332,8 +343,13 @@ int ll_merge(mmbuffer_t *result_buf, const char *ll_driver_name = NULL; int marker_size = DEFAULT_CONFLICT_MARKER_SIZE; const struct ll_merge_driver *driver; - int virtual_ancestor = flag & 01; + int virtual_ancestor = flag & LL_OPT_VIRTUAL_ANCESTOR; + if (flag & LL_OPT_RENORMALIZE) { + normalize_file(ancestor, path); + normalize_file(ours, path); + normalize_file(theirs, path); + } if (!git_path_check_merge(path, check)) { ll_driver_name = check[0].value; if (check[1].value) { diff --git a/ll-merge.h b/ll-merge.h index 57754cc8ca..ff7ca87bfa 100644 --- a/ll-merge.h +++ b/ll-merge.h @@ -5,6 +5,21 @@ #ifndef LL_MERGE_H #define LL_MERGE_H +#define LL_OPT_VIRTUAL_ANCESTOR (1 << 0) +#define LL_OPT_FAVOR_MASK ((1 << 1) | (1 << 2)) +#define LL_OPT_FAVOR_SHIFT 1 +#define LL_OPT_RENORMALIZE (1 << 3) + +static inline int ll_opt_favor(int flag) +{ + return (flag & LL_OPT_FAVOR_MASK) >> LL_OPT_FAVOR_SHIFT; +} + +static inline int create_ll_flag(int favor) +{ + return ((favor << LL_OPT_FAVOR_SHIFT) & LL_OPT_FAVOR_MASK); +} + int ll_merge(mmbuffer_t *result_buf, const char *path, mmfile_t *ancestor, const char *ancestor_label, diff --git a/merge-recursive.c b/merge-recursive.c index fb6aa4a551..20e1779428 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -20,6 +20,7 @@ #include "attr.h" #include "merge-recursive.h" #include "dir.h" +#include "submodule.h" static struct tree *shift_tree_object(struct tree *one, struct tree *two, const char *subtree_shift) @@ -136,16 +137,10 @@ static void output_commit_title(struct merge_options *o, struct commit *commit) if (parse_commit(commit) != 0) printf("(bad commit)\n"); else { - const char *s; - int len; - for (s = commit->buffer; *s; s++) - if (*s == '\n' && s[1] == '\n') { - s += 2; - break; - } - for (len = 0; s[len] && '\n' != s[len]; len++) - ; /* do nothing */ - printf("%.*s\n", len, s); + const char *title; + int len = find_commit_subject(commit->buffer, &title); + if (len) + printf("%.*s\n", len, title); } } } @@ -185,7 +180,7 @@ static int git_merge_trees(int index_only, opts.fn = threeway_merge; opts.src_index = &the_index; opts.dst_index = &the_index; - opts.msgs = get_porcelain_error_msgs(); + setup_unpack_trees_porcelain(&opts, "merge"); init_tree_desc_from_tree(t+0, common); init_tree_desc_from_tree(t+1, head); @@ -525,13 +520,15 @@ static void update_file_flags(struct merge_options *o, void *buf; unsigned long size; - if (S_ISGITLINK(mode)) + if (S_ISGITLINK(mode)) { /* * We may later decide to recursively descend into * the submodule directory and update its index * and/or work tree, but we do not do that now. */ + update_wd = 0; goto update_index; + } buf = read_sha1_file(sha, &type, &size); if (!buf) @@ -647,7 +644,9 @@ static int merge_3way(struct merge_options *o, merge_status = ll_merge(result_buf, a->path, &orig, base_name, &src1, name1, &src2, name2, - (!!o->call_depth) | (favor << 1)); + ((o->call_depth ? LL_OPT_VIRTUAL_ANCESTOR : 0) | + (o->renormalize ? LL_OPT_RENORMALIZE : 0) | + create_ll_flag(favor))); free(name1); free(name2); @@ -716,8 +715,8 @@ static struct merge_file_info merge_file(struct merge_options *o, free(result_buf.ptr); result.clean = (merge_status == 0); } else if (S_ISGITLINK(a->mode)) { - result.clean = 0; - hashcpy(result.sha, a->sha1); + result.clean = merge_submodule(result.sha, one->path, one->sha1, + a->sha1, b->sha1); } else if (S_ISLNK(a->mode)) { hashcpy(result.sha, a->sha1); @@ -806,7 +805,8 @@ static int process_renames(struct merge_options *o, struct string_list *b_renames) { int clean_merge = 1, i, j; - struct string_list a_by_dst = {NULL, 0, 0, 0}, b_by_dst = {NULL, 0, 0, 0}; + struct string_list a_by_dst = STRING_LIST_INIT_NODUP; + struct string_list b_by_dst = STRING_LIST_INIT_NODUP; const struct rename *sre; for (i = 0; i < a_renames->nr; i++) { @@ -1019,14 +1019,22 @@ static int process_renames(struct merge_options *o, if (mfi.clean && sha_eq(mfi.sha, ren1->pair->two->sha1) && - mfi.mode == ren1->pair->two->mode) + mfi.mode == ren1->pair->two->mode) { /* - * This messaged is part of + * This message is part of * t6022 test. If you change * it update the test too. */ output(o, 3, "Skipped %s (merged same as existing)", ren1_dst); - else { + + /* There may be higher stage entries left + * in the index (e.g. due to a D/F + * conflict) that need to be resolved. + */ + if (!ren1->dst_entry->stages[2].mode != + !ren1->dst_entry->stages[3].mode) + ren1->dst_entry->processed = 0; + } else { if (mfi.merge || !mfi.clean) output(o, 1, "Renaming %s => %s", ren1_src, ren1_dst); if (mfi.merge) @@ -1056,6 +1064,53 @@ static unsigned char *stage_sha(const unsigned char *sha, unsigned mode) return (is_null_sha1(sha) || mode == 0) ? NULL: (unsigned char *)sha; } +static int read_sha1_strbuf(const unsigned char *sha1, struct strbuf *dst) +{ + void *buf; + enum object_type type; + unsigned long size; + buf = read_sha1_file(sha1, &type, &size); + if (!buf) + return error("cannot read object %s", sha1_to_hex(sha1)); + if (type != OBJ_BLOB) { + free(buf); + return error("object %s is not a blob", sha1_to_hex(sha1)); + } + strbuf_attach(dst, buf, size, size + 1); + return 0; +} + +static int blob_unchanged(const unsigned char *o_sha, + const unsigned char *a_sha, + int renormalize, const char *path) +{ + struct strbuf o = STRBUF_INIT; + struct strbuf a = STRBUF_INIT; + int ret = 0; /* assume changed for safety */ + + if (sha_eq(o_sha, a_sha)) + return 1; + if (!renormalize) + return 0; + + assert(o_sha && a_sha); + if (read_sha1_strbuf(o_sha, &o) || read_sha1_strbuf(a_sha, &a)) + goto error_return; + /* + * Note: binary | is used so that both renormalizations are + * performed. Comparison can be skipped if both files are + * unchanged since their sha1s have already been compared. + */ + if (renormalize_buffer(path, o.buf, o.len, &o) | + renormalize_buffer(path, a.buf, o.len, &a)) + ret = (o.len == a.len && !memcmp(o.buf, a.buf, o.len)); + +error_return: + strbuf_release(&o); + strbuf_release(&a); + return ret; +} + /* Per entry merge function */ static int process_entry(struct merge_options *o, const char *path, struct stage_data *entry) @@ -1065,6 +1120,7 @@ static int process_entry(struct merge_options *o, print_index_entry("\tpath: ", entry); */ int clean_merge = 1; + int normalize = o->renormalize; unsigned o_mode = entry->stages[1].mode; unsigned a_mode = entry->stages[2].mode; unsigned b_mode = entry->stages[3].mode; @@ -1072,11 +1128,12 @@ static int process_entry(struct merge_options *o, unsigned char *a_sha = stage_sha(entry->stages[2].sha, a_mode); unsigned char *b_sha = stage_sha(entry->stages[3].sha, b_mode); + entry->processed = 1; if (o_sha && (!a_sha || !b_sha)) { /* Case A: Deleted in one */ if ((!a_sha && !b_sha) || - (sha_eq(a_sha, o_sha) && !b_sha) || - (!a_sha && sha_eq(b_sha, o_sha))) { + (!b_sha && blob_unchanged(o_sha, a_sha, normalize, path)) || + (!a_sha && blob_unchanged(o_sha, b_sha, normalize, path))) { /* Deleted in both or deleted in one and * unchanged in the other */ if (a_sha) @@ -1104,33 +1161,28 @@ static int process_entry(struct merge_options *o, } else if ((!o_sha && a_sha && !b_sha) || (!o_sha && !a_sha && b_sha)) { /* Case B: Added in one. */ - const char *add_branch; - const char *other_branch; unsigned mode; const unsigned char *sha; - const char *conf; if (a_sha) { - add_branch = o->branch1; - other_branch = o->branch2; mode = a_mode; sha = a_sha; - conf = "file/directory"; } else { - add_branch = o->branch2; - other_branch = o->branch1; mode = b_mode; sha = b_sha; - conf = "directory/file"; } if (string_list_has_string(&o->current_directory_set, path)) { - const char *new_path = unique_path(o, path, add_branch); - clean_merge = 0; - output(o, 1, "CONFLICT (%s): There is a directory with name %s in %s. " - "Adding %s as %s", - conf, path, other_branch, path, new_path); - remove_file(o, 0, path, 0); - update_file(o, 0, sha, mode, new_path); + /* Handle D->F conflicts after all subfiles */ + entry->processed = 0; + /* But get any file out of the way now, so conflicted + * entries below the directory of the same name can + * be put in the working directory. + */ + if (a_sha) + output(o, 2, "Removing %s", path); + /* do not touch working file if it did not exist */ + remove_file(o, 0, path, !a_sha); + return 1; /* Assume clean till processed */ } else { output(o, 2, "Adding %s", path); update_file(o, 1, sha, mode, path); @@ -1178,26 +1230,62 @@ static int process_entry(struct merge_options *o, return clean_merge; } -struct unpack_trees_error_msgs get_porcelain_error_msgs(void) +/* + * Per entry merge function for D/F conflicts, to be called only after + * all files below dir have been processed. We do this because in the + * cases we can cleanly resolve D/F conflicts, process_entry() can clean + * out all the files below the directory for us. + */ +static int process_df_entry(struct merge_options *o, + const char *path, struct stage_data *entry) { - struct unpack_trees_error_msgs msgs = { - /* would_overwrite */ - "Your local changes to '%s' would be overwritten by merge. Aborting.", - /* not_uptodate_file */ - "Your local changes to '%s' would be overwritten by merge. Aborting.", - /* not_uptodate_dir */ - "Updating '%s' would lose untracked files in it. Aborting.", - /* would_lose_untracked */ - "Untracked working tree file '%s' would be %s by merge. Aborting", - /* bind_overlap -- will not happen here */ - NULL, - }; - if (advice_commit_before_merge) { - msgs.would_overwrite = msgs.not_uptodate_file = - "Your local changes to '%s' would be overwritten by merge. Aborting.\n" - "Please, commit your changes or stash them before you can merge."; + int clean_merge = 1; + unsigned o_mode = entry->stages[1].mode; + unsigned a_mode = entry->stages[2].mode; + unsigned b_mode = entry->stages[3].mode; + unsigned char *o_sha = stage_sha(entry->stages[1].sha, o_mode); + unsigned char *a_sha = stage_sha(entry->stages[2].sha, a_mode); + unsigned char *b_sha = stage_sha(entry->stages[3].sha, b_mode); + const char *add_branch; + const char *other_branch; + unsigned mode; + const unsigned char *sha; + const char *conf; + struct stat st; + + /* We currently only handle D->F cases */ + assert((!o_sha && a_sha && !b_sha) || + (!o_sha && !a_sha && b_sha)); + + entry->processed = 1; + + if (a_sha) { + add_branch = o->branch1; + other_branch = o->branch2; + mode = a_mode; + sha = a_sha; + conf = "file/directory"; + } else { + add_branch = o->branch2; + other_branch = o->branch1; + mode = b_mode; + sha = b_sha; + conf = "directory/file"; } - return msgs; + if (lstat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + const char *new_path = unique_path(o, path, add_branch); + clean_merge = 0; + output(o, 1, "CONFLICT (%s): There is a directory with name %s in %s. " + "Adding %s as %s", + conf, path, other_branch, path, new_path); + remove_file(o, 0, path, 0); + update_file(o, 0, sha, mode, new_path); + } else { + output(o, 2, "Adding %s", path); + update_file(o, 1, sha, mode, path); + } + + return clean_merge; } int merge_trees(struct merge_options *o, @@ -1249,6 +1337,13 @@ int merge_trees(struct merge_options *o, && !process_entry(o, path, e)) clean = 0; } + for (i = 0; i < entries->nr; i++) { + const char *path = entries->items[i].string; + struct stage_data *e = entries->items[i].util; + if (!e->processed + && !process_df_entry(o, path, e)) + clean = 0; + } string_list_clear(re_merge, 0); string_list_clear(re_head, 0); @@ -1436,6 +1531,7 @@ void init_merge_options(struct merge_options *o) o->buffer_output = 1; o->diff_rename_limit = -1; o->merge_rename_limit = -1; + o->renormalize = 0; git_config(merge_recursive_config, o); if (getenv("GIT_MERGE_VERBOSITY")) o->verbosity = diff --git a/merge-recursive.h b/merge-recursive.h index b831293b38..34492dbd6e 100644 --- a/merge-recursive.h +++ b/merge-recursive.h @@ -14,6 +14,7 @@ struct merge_options { } recursive_variant; const char *subtree_shift; unsigned buffer_output : 1; + unsigned renormalize : 1; int verbosity; int diff_rename_limit; int merge_rename_limit; @@ -23,9 +24,6 @@ struct merge_options { struct string_list current_directory_set; }; -/* Return a list of user-friendly error messages to be used by merge */ -struct unpack_trees_error_msgs get_porcelain_error_msgs(void); - /* merge_trees() but with recursive ancestor consolidation */ int merge_recursive(struct merge_options *o, struct commit *h1, @@ -877,14 +877,6 @@ void string_list_add_refs_from_colon_sep(struct string_list *list, strbuf_release(&globbuf); } -static int string_list_add_refs_from_list(struct string_list_item *item, - void *cb) -{ - struct string_list *list = cb; - string_list_add_refs_by_glob(list, item->string); - return 0; -} - static int notes_display_config(const char *k, const char *v, void *cb) { int *load_refs = cb; @@ -947,30 +939,18 @@ void init_notes(struct notes_tree *t, const char *notes_ref, load_subtree(t, &root_tree, t->root, 0); } -struct load_notes_cb_data { - int counter; - struct notes_tree **trees; -}; - -static int load_one_display_note_ref(struct string_list_item *item, - void *cb_data) -{ - struct load_notes_cb_data *c = cb_data; - struct notes_tree *t = xcalloc(1, sizeof(struct notes_tree)); - init_notes(t, item->string, combine_notes_ignore, 0); - c->trees[c->counter++] = t; - return 0; -} - struct notes_tree **load_notes_trees(struct string_list *refs) { + struct string_list_item *item; + int counter = 0; struct notes_tree **trees; - struct load_notes_cb_data cb_data; trees = xmalloc((refs->nr+1) * sizeof(struct notes_tree *)); - cb_data.counter = 0; - cb_data.trees = trees; - for_each_string_list(refs, load_one_display_note_ref, &cb_data); - trees[cb_data.counter] = NULL; + for_each_string_list_item(item, refs) { + struct notes_tree *t = xcalloc(1, sizeof(struct notes_tree)); + init_notes(t, item->string, combine_notes_ignore, 0); + trees[counter++] = t; + } + trees[counter] = NULL; return trees; } @@ -995,10 +975,12 @@ void init_display_notes(struct display_notes_opt *opt) git_config(notes_display_config, &load_config_refs); - if (opt && opt->extra_notes_refs) - for_each_string_list(opt->extra_notes_refs, - string_list_add_refs_from_list, - &display_notes_refs); + if (opt && opt->extra_notes_refs) { + struct string_list_item *item; + for_each_string_list_item(item, opt->extra_notes_refs) + string_list_add_refs_by_glob(&display_notes_refs, + item->string); + } display_notes_trees = load_notes_trees(&display_notes_refs); string_list_clear(&display_notes_refs, 0); @@ -199,7 +199,7 @@ struct object *parse_object(const unsigned char *sha1) return NULL; } - obj = parse_object_buffer(repl, type, size, buffer, &eaten); + obj = parse_object_buffer(sha1, type, size, buffer, &eaten); if (!eaten) free(buffer); return obj; @@ -21,6 +21,8 @@ struct object_array { } *objects; }; +#define OBJECT_ARRAY_INIT { 0, 0, NULL } + #define TYPE_BITS 3 #define FLAG_BITS 27 @@ -122,6 +122,44 @@ char *git_path(const char *fmt, ...) return cleanup_path(pathname); } +char *git_path_submodule(const char *path, const char *fmt, ...) +{ + char *pathname = get_pathname(); + struct strbuf buf = STRBUF_INIT; + const char *git_dir; + va_list args; + unsigned len; + + len = strlen(path); + if (len > PATH_MAX-100) + return bad_path; + + strbuf_addstr(&buf, path); + if (len && path[len-1] != '/') + strbuf_addch(&buf, '/'); + strbuf_addstr(&buf, ".git"); + + git_dir = read_gitfile_gently(buf.buf); + if (git_dir) { + strbuf_reset(&buf); + strbuf_addstr(&buf, git_dir); + } + strbuf_addch(&buf, '/'); + + if (buf.len >= PATH_MAX) + return bad_path; + memcpy(pathname, buf.buf, buf.len + 1); + + strbuf_release(&buf); + len = strlen(pathname); + + va_start(args, fmt); + len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); + va_end(args); + if (len >= PATH_MAX) + return bad_path; + return cleanup_path(pathname); +} /* git_mkstemp() - create tmp file honoring TMPDIR variable */ int git_mkstemp(char *path, size_t len, const char *template) diff --git a/perl/Makefile b/perl/Makefile index 4ab21d61b8..a2ffb6402d 100644 --- a/perl/Makefile +++ b/perl/Makefile @@ -38,7 +38,7 @@ $(makfile): ../GIT-CFLAGS Makefile echo ' echo $(instdir_SQ)' >> $@ else $(makfile): Makefile.PL ../GIT-CFLAGS - $(PERL_PATH) $< PREFIX='$(prefix_SQ)' + $(PERL_PATH) $< PREFIX='$(prefix_SQ)' INSTALL_BASE='' endif # this is just added comfort for calling make directly in perl dir diff --git a/reachable.c b/reachable.c index b515fa2de3..a03fabf060 100644 --- a/reachable.c +++ b/reachable.c @@ -90,7 +90,7 @@ static void walk_commit_list(struct rev_info *revs) { int i; struct commit *commit; - struct object_array objects = { 0, 0, NULL }; + struct object_array objects = OBJECT_ARRAY_INIT; /* Walk all commits, process their trees */ while ((commit = get_revision(revs)) != NULL) @@ -157,7 +157,7 @@ static struct cached_refs { char did_packed; struct ref_list *loose; struct ref_list *packed; -} cached_refs; +} cached_refs, submodule_refs; static struct ref_list *current_ref; static struct ref_list *extra_refs; @@ -229,23 +229,45 @@ void clear_extra_refs(void) extra_refs = NULL; } -static struct ref_list *get_packed_refs(void) +static struct ref_list *get_packed_refs(const char *submodule) { - if (!cached_refs.did_packed) { - FILE *f = fopen(git_path("packed-refs"), "r"); - cached_refs.packed = NULL; + const char *packed_refs_file; + struct cached_refs *refs; + + if (submodule) { + packed_refs_file = git_path_submodule(submodule, "packed-refs"); + refs = &submodule_refs; + free_ref_list(refs->packed); + } else { + packed_refs_file = git_path("packed-refs"); + refs = &cached_refs; + } + + if (!refs->did_packed || submodule) { + FILE *f = fopen(packed_refs_file, "r"); + refs->packed = NULL; if (f) { - read_packed_refs(f, &cached_refs); + read_packed_refs(f, refs); fclose(f); } - cached_refs.did_packed = 1; + refs->did_packed = 1; } - return cached_refs.packed; + return refs->packed; } -static struct ref_list *get_ref_dir(const char *base, struct ref_list *list) +static struct ref_list *get_ref_dir(const char *submodule, const char *base, + struct ref_list *list) { - DIR *dir = opendir(git_path("%s", base)); + DIR *dir; + const char *path; + + if (submodule) + path = git_path_submodule(submodule, "%s", base); + else + path = git_path("%s", base); + + + dir = opendir(path); if (dir) { struct dirent *de; @@ -261,6 +283,7 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list) struct stat st; int flag; int namelen; + const char *refdir; if (de->d_name[0] == '.') continue; @@ -270,16 +293,27 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list) if (has_extension(de->d_name, ".lock")) continue; memcpy(ref + baselen, de->d_name, namelen+1); - if (stat(git_path("%s", ref), &st) < 0) + refdir = submodule + ? git_path_submodule(submodule, "%s", ref) + : git_path("%s", ref); + if (stat(refdir, &st) < 0) continue; if (S_ISDIR(st.st_mode)) { - list = get_ref_dir(ref, list); + list = get_ref_dir(submodule, ref, list); continue; } - if (!resolve_ref(ref, sha1, 1, &flag)) { + if (submodule) { hashclr(sha1); - flag |= REF_BROKEN; - } + flag = 0; + if (resolve_gitlink_ref(submodule, ref, sha1) < 0) { + hashclr(sha1); + flag |= REF_BROKEN; + } + } else + if (!resolve_ref(ref, sha1, 1, &flag)) { + hashclr(sha1); + flag |= REF_BROKEN; + } list = add_ref(ref, sha1, flag, list, NULL); } free(ref); @@ -322,10 +356,16 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname) for_each_rawref(warn_if_dangling_symref, &data); } -static struct ref_list *get_loose_refs(void) +static struct ref_list *get_loose_refs(const char *submodule) { + if (submodule) { + free_ref_list(submodule_refs.loose); + submodule_refs.loose = get_ref_dir(submodule, "refs", NULL); + return submodule_refs.loose; + } + if (!cached_refs.did_loose) { - cached_refs.loose = get_ref_dir("refs", NULL); + cached_refs.loose = get_ref_dir(NULL, "refs", NULL); cached_refs.did_loose = 1; } return cached_refs.loose; @@ -459,7 +499,7 @@ const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int * git_snpath(path, sizeof(path), "%s", ref); /* Special case: non-existing file. */ if (lstat(path, &st) < 0) { - struct ref_list *list = get_packed_refs(); + struct ref_list *list = get_packed_refs(NULL); while (list) { if (!strcmp(ref, list->name)) { hashcpy(sha1, list->sha1); @@ -588,7 +628,7 @@ int peel_ref(const char *ref, unsigned char *sha1) return -1; if ((flag & REF_ISPACKED)) { - struct ref_list *list = get_packed_refs(); + struct ref_list *list = get_packed_refs(NULL); while (list) { if (!strcmp(list->name, ref)) { @@ -615,12 +655,12 @@ fallback: return -1; } -static int do_for_each_ref(const char *base, each_ref_fn fn, int trim, - int flags, void *cb_data) +static int do_for_each_ref(const char *submodule, const char *base, each_ref_fn fn, + int trim, int flags, void *cb_data) { int retval = 0; - struct ref_list *packed = get_packed_refs(); - struct ref_list *loose = get_loose_refs(); + struct ref_list *packed = get_packed_refs(submodule); + struct ref_list *loose = get_loose_refs(submodule); struct ref_list *extra; @@ -657,24 +697,54 @@ end_each: return retval; } -int head_ref(each_ref_fn fn, void *cb_data) + +static int do_head_ref(const char *submodule, each_ref_fn fn, void *cb_data) { unsigned char sha1[20]; int flag; + if (submodule) { + if (resolve_gitlink_ref(submodule, "HEAD", sha1) == 0) + return fn("HEAD", sha1, 0, cb_data); + + return 0; + } + if (resolve_ref("HEAD", sha1, 1, &flag)) return fn("HEAD", sha1, flag, cb_data); + return 0; } +int head_ref(each_ref_fn fn, void *cb_data) +{ + return do_head_ref(NULL, fn, cb_data); +} + +int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return do_head_ref(submodule, fn, cb_data); +} + int for_each_ref(each_ref_fn fn, void *cb_data) { - return do_for_each_ref("refs/", fn, 0, 0, cb_data); + return do_for_each_ref(NULL, "refs/", fn, 0, 0, cb_data); +} + +int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(submodule, "refs/", fn, 0, 0, cb_data); } int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data) { - return do_for_each_ref(prefix, fn, strlen(prefix), 0, cb_data); + return do_for_each_ref(NULL, prefix, fn, strlen(prefix), 0, cb_data); +} + +int for_each_ref_in_submodule(const char *submodule, const char *prefix, + each_ref_fn fn, void *cb_data) +{ + return do_for_each_ref(submodule, prefix, fn, strlen(prefix), 0, cb_data); } int for_each_tag_ref(each_ref_fn fn, void *cb_data) @@ -682,19 +752,34 @@ int for_each_tag_ref(each_ref_fn fn, void *cb_data) return for_each_ref_in("refs/tags/", fn, cb_data); } +int for_each_tag_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return for_each_ref_in_submodule(submodule, "refs/tags/", fn, cb_data); +} + int for_each_branch_ref(each_ref_fn fn, void *cb_data) { return for_each_ref_in("refs/heads/", fn, cb_data); } +int for_each_branch_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return for_each_ref_in_submodule(submodule, "refs/heads/", fn, cb_data); +} + int for_each_remote_ref(each_ref_fn fn, void *cb_data) { return for_each_ref_in("refs/remotes/", fn, cb_data); } +int for_each_remote_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data) +{ + return for_each_ref_in_submodule(submodule, "refs/remotes/", fn, cb_data); +} + int for_each_replace_ref(each_ref_fn fn, void *cb_data) { - return do_for_each_ref("refs/replace/", fn, 13, 0, cb_data); + return do_for_each_ref(NULL, "refs/replace/", fn, 13, 0, cb_data); } int for_each_glob_ref_in(each_ref_fn fn, const char *pattern, @@ -734,7 +819,7 @@ int for_each_glob_ref(each_ref_fn fn, const char *pattern, void *cb_data) int for_each_rawref(each_ref_fn fn, void *cb_data) { - return do_for_each_ref("refs/", fn, 0, + return do_for_each_ref(NULL, "refs/", fn, 0, DO_FOR_EACH_INCLUDE_BROKEN, cb_data); } @@ -958,7 +1043,7 @@ static struct ref_lock *lock_ref_sha1_basic(const char *ref, const unsigned char * name is a proper prefix of our refname. */ if (missing && - !is_refname_available(ref, NULL, get_packed_refs(), 0)) { + !is_refname_available(ref, NULL, get_packed_refs(NULL), 0)) { last_errno = ENOTDIR; goto error_return; } @@ -1021,7 +1106,7 @@ static int repack_without_ref(const char *refname) int fd; int found = 0; - packed_ref_list = get_packed_refs(); + packed_ref_list = get_packed_refs(NULL); for (list = packed_ref_list; list; list = list->next) { if (!strcmp(refname, list->name)) { found = 1; @@ -1119,10 +1204,10 @@ int rename_ref(const char *oldref, const char *newref, const char *logmsg) if (!symref) return error("refname %s not found", oldref); - if (!is_refname_available(newref, oldref, get_packed_refs(), 0)) + if (!is_refname_available(newref, oldref, get_packed_refs(NULL), 0)) return 1; - if (!is_refname_available(newref, oldref, get_loose_refs(), 0)) + if (!is_refname_available(newref, oldref, get_loose_refs(NULL), 0)) return 1; lock = lock_ref_sha1_basic(renamed_ref, NULL, 0, NULL); @@ -28,6 +28,14 @@ extern int for_each_replace_ref(each_ref_fn, void *); extern int for_each_glob_ref(each_ref_fn, const char *pattern, void *); extern int for_each_glob_ref_in(each_ref_fn, const char *pattern, const char* prefix, void *); +extern int head_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_ref_in_submodule(const char *submodule, const char *prefix, + each_ref_fn fn, void *cb_data); +extern int for_each_tag_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_branch_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); +extern int for_each_remote_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data); + static inline const char *has_glob_specials(const char *pattern) { return strpbrk(pattern, "?*["); @@ -754,7 +754,7 @@ int for_each_remote(each_remote_fn fn, void *priv) void ref_remove_duplicates(struct ref *ref_map) { - struct string_list refs = { NULL, 0, 0, 0 }; + struct string_list refs = STRING_LIST_INIT_NODUP; struct string_list_item *item = NULL; struct ref *prev = NULL, *next = NULL; for (; ref_map; prev = ref_map, ref_map = next) { @@ -1704,7 +1704,7 @@ static int get_stale_heads_cb(const char *refname, struct ref *get_stale_heads(struct remote *remote, struct ref *fetch_map) { struct ref *ref, *stale_refs = NULL; - struct string_list ref_names = { NULL, 0, 0, 0 }; + struct string_list ref_names = STRING_LIST_INIT_NODUP; struct stale_heads_info info; info.remote = remote; info.ref_names = &ref_names; @@ -319,6 +319,10 @@ static int handle_cache(const char *path, unsigned char *sha1, const char *outpu if (!mmfile[i].ptr && !mmfile[i].size) mmfile[i].ptr = xstrdup(""); } + /* + * NEEDSWORK: handle conflicts from merges with + * merge.renormalize set, too + */ ll_merge(&result, path, &mmfile[0], NULL, &mmfile[1], "ours", &mmfile[2], "theirs", 0); @@ -432,8 +436,8 @@ static int update_paths(struct string_list *update) static int do_plain_rerere(struct string_list *rr, int fd) { - struct string_list conflict = { NULL, 0, 0, 1 }; - struct string_list update = { NULL, 0, 0, 1 }; + struct string_list conflict = STRING_LIST_INIT_DUP; + struct string_list update = STRING_LIST_INIT_DUP; int i; find_conflict(&conflict); @@ -553,7 +557,7 @@ int setup_rerere(struct string_list *merge_rr, int flags) int rerere(int flags) { - struct string_list merge_rr = { NULL, 0, 0, 1 }; + struct string_list merge_rr = STRING_LIST_INIT_DUP; int fd; fd = setup_rerere(&merge_rr, flags); @@ -591,8 +595,8 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr) int rerere_forget(const char **pathspec) { int i, fd; - struct string_list conflict = { NULL, 0, 0, 1 }; - struct string_list merge_rr = { NULL, 0, 0, 1 }; + struct string_list conflict = STRING_LIST_INIT_DUP; + struct string_list merge_rr = STRING_LIST_INIT_DUP; if (read_cache() < 0) return error("Could not read index"); diff --git a/resolve-undo.c b/resolve-undo.c index 174ebec9e5..72b46125b7 100644 --- a/resolve-undo.c +++ b/resolve-undo.c @@ -28,29 +28,25 @@ void record_resolve_undo(struct index_state *istate, struct cache_entry *ce) ui->mode[stage - 1] = ce->ce_mode; } -static int write_one(struct string_list_item *item, void *cbdata) +void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo) { - struct strbuf *sb = cbdata; - struct resolve_undo_info *ui = item->util; - int i; + struct string_list_item *item; + for_each_string_list_item(item, resolve_undo) { + struct resolve_undo_info *ui = item->util; + int i; - if (!ui) - return 0; - strbuf_addstr(sb, item->string); - strbuf_addch(sb, 0); - for (i = 0; i < 3; i++) - strbuf_addf(sb, "%o%c", ui->mode[i], 0); - for (i = 0; i < 3; i++) { - if (!ui->mode[i]) + if (!ui) continue; - strbuf_add(sb, ui->sha1[i], 20); + strbuf_addstr(sb, item->string); + strbuf_addch(sb, 0); + for (i = 0; i < 3; i++) + strbuf_addf(sb, "%o%c", ui->mode[i], 0); + for (i = 0; i < 3; i++) { + if (!ui->mode[i]) + continue; + strbuf_add(sb, ui->sha1[i], 20); + } } - return 0; -} - -void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo) -{ - for_each_string_list(resolve_undo, write_one, sb); } struct string_list *resolve_undo_read(const char *data, unsigned long size) diff --git a/revision.c b/revision.c index 7e82efd932..b1c18906ba 100644 --- a/revision.c +++ b/revision.c @@ -820,12 +820,12 @@ static void init_all_refs_cb(struct all_refs_cb *cb, struct rev_info *revs, cb->all_flags = flags; } -static void handle_refs(struct rev_info *revs, unsigned flags, - int (*for_each)(each_ref_fn, void *)) +static void handle_refs(const char *submodule, struct rev_info *revs, unsigned flags, + int (*for_each)(const char *, each_ref_fn, void *)) { struct all_refs_cb cb; init_all_refs_cb(&cb, revs, flags); - for_each(handle_one_ref, &cb); + for_each(submodule, handle_one_ref, &cb); } static void handle_one_reflog_commit(unsigned char *sha1, void *cb_data) @@ -1148,6 +1148,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg int *unkc, const char **unkv) { const char *arg = argv[0]; + const char *optarg; + int argcount; /* pseudo revision arguments */ if (!strcmp(arg, "--all") || !strcmp(arg, "--branches") || @@ -1160,11 +1162,13 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg return 1; } - if (!prefixcmp(arg, "--max-count=")) { - revs->max_count = atoi(arg + 12); + if ((argcount = parse_long_opt("max-count", argv, &optarg))) { + revs->max_count = atoi(optarg); revs->no_walk = 0; - } else if (!prefixcmp(arg, "--skip=")) { - revs->skip_count = atoi(arg + 7); + return argcount; + } else if ((argcount = parse_long_opt("skip", argv, &optarg))) { + revs->skip_count = atoi(optarg); + return argcount; } else if ((*arg == '-') && isdigit(arg[1])) { /* accept -<digit>, like traditional "head" */ revs->max_count = atoi(arg + 1); @@ -1178,18 +1182,24 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if (!prefixcmp(arg, "-n")) { revs->max_count = atoi(arg + 2); revs->no_walk = 0; - } else if (!prefixcmp(arg, "--max-age=")) { - revs->max_age = atoi(arg + 10); - } else if (!prefixcmp(arg, "--since=")) { - revs->max_age = approxidate(arg + 8); - } else if (!prefixcmp(arg, "--after=")) { - revs->max_age = approxidate(arg + 8); - } else if (!prefixcmp(arg, "--min-age=")) { - revs->min_age = atoi(arg + 10); - } else if (!prefixcmp(arg, "--before=")) { - revs->min_age = approxidate(arg + 9); - } else if (!prefixcmp(arg, "--until=")) { - revs->min_age = approxidate(arg + 8); + } else if ((argcount = parse_long_opt("max-age", argv, &optarg))) { + revs->max_age = atoi(optarg); + return argcount; + } else if ((argcount = parse_long_opt("since", argv, &optarg))) { + revs->max_age = approxidate(optarg); + return argcount; + } else if ((argcount = parse_long_opt("after", argv, &optarg))) { + revs->max_age = approxidate(optarg); + return argcount; + } else if ((argcount = parse_long_opt("min-age", argv, &optarg))) { + revs->min_age = atoi(optarg); + return argcount; + } else if ((argcount = parse_long_opt("before", argv, &optarg))) { + revs->min_age = approxidate(optarg); + return argcount; + } else if ((argcount = parse_long_opt("until", argv, &optarg))) { + revs->min_age = approxidate(optarg); + return argcount; } else if (!strcmp(arg, "--first-parent")) { revs->first_parent_only = 1; } else if (!strcmp(arg, "--ancestry-path")) { @@ -1295,6 +1305,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->pretty_given = 1; get_commit_format(arg+8, revs); } else if (!prefixcmp(arg, "--pretty=") || !prefixcmp(arg, "--format=")) { + /* + * Detached form ("--pretty X" as opposed to "--pretty=X") + * not allowed, since the argument is optional. + */ revs->verbose_header = 1; revs->pretty_given = 1; get_commit_format(arg+9, revs); @@ -1359,21 +1373,25 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if (!strcmp(arg, "--relative-date")) { revs->date_mode = DATE_RELATIVE; revs->date_mode_explicit = 1; - } else if (!strncmp(arg, "--date=", 7)) { - revs->date_mode = parse_date_format(arg + 7); + } else if ((argcount = parse_long_opt("date", argv, &optarg))) { + revs->date_mode = parse_date_format(optarg); revs->date_mode_explicit = 1; + return argcount; } else if (!strcmp(arg, "--log-size")) { revs->show_log_size = 1; } /* * Grepping the commit log */ - else if (!prefixcmp(arg, "--author=")) { - add_header_grep(revs, GREP_HEADER_AUTHOR, arg+9); - } else if (!prefixcmp(arg, "--committer=")) { - add_header_grep(revs, GREP_HEADER_COMMITTER, arg+12); - } else if (!prefixcmp(arg, "--grep=")) { - add_message_grep(revs, arg+7); + else if ((argcount = parse_long_opt("author", argv, &optarg))) { + add_header_grep(revs, GREP_HEADER_AUTHOR, optarg); + return argcount; + } else if ((argcount = parse_long_opt("committer", argv, &optarg))) { + add_header_grep(revs, GREP_HEADER_COMMITTER, optarg); + return argcount; + } else if ((argcount = parse_long_opt("grep", argv, &optarg))) { + add_message_grep(revs, optarg); + return argcount; } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) { revs->grep_filter.regflags |= REG_EXTENDED; } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) { @@ -1382,12 +1400,12 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->grep_filter.fixed = 1; } else if (!strcmp(arg, "--all-match")) { revs->grep_filter.all_match = 1; - } else if (!prefixcmp(arg, "--encoding=")) { - arg += 11; - if (strcmp(arg, "none")) - git_log_output_encoding = xstrdup(arg); + } else if ((argcount = parse_long_opt("encoding", argv, &optarg))) { + if (strcmp(optarg, "none")) + git_log_output_encoding = xstrdup(optarg); else git_log_output_encoding = ""; + return argcount; } else if (!strcmp(arg, "--reverse")) { revs->reverse ^= 1; } else if (!strcmp(arg, "--children")) { @@ -1417,14 +1435,14 @@ void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx, ctx->argc -= n; } -static int for_each_bad_bisect_ref(each_ref_fn fn, void *cb_data) +static int for_each_bad_bisect_ref(const char *submodule, each_ref_fn fn, void *cb_data) { - return for_each_ref_in("refs/bisect/bad", fn, cb_data); + return for_each_ref_in_submodule(submodule, "refs/bisect/bad", fn, cb_data); } -static int for_each_good_bisect_ref(each_ref_fn fn, void *cb_data) +static int for_each_good_bisect_ref(const char *submodule, each_ref_fn fn, void *cb_data) { - return for_each_ref_in("refs/bisect/good", fn, cb_data); + return for_each_ref_in_submodule(submodule, "refs/bisect/good", fn, cb_data); } static void append_prune_data(const char ***prune_data, const char **av) @@ -1466,6 +1484,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s { int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0; const char **prune_data = NULL; + const char *submodule = NULL; + const char *optarg; + int argcount; + + if (opt) + submodule = opt->submodule; /* First, search for "--" */ seen_dashdash = 0; @@ -1490,32 +1514,33 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s int opts; if (!strcmp(arg, "--all")) { - handle_refs(revs, flags, for_each_ref); - handle_refs(revs, flags, head_ref); + handle_refs(submodule, revs, flags, for_each_ref_submodule); + handle_refs(submodule, revs, flags, head_ref_submodule); continue; } if (!strcmp(arg, "--branches")) { - handle_refs(revs, flags, for_each_branch_ref); + handle_refs(submodule, revs, flags, for_each_branch_ref_submodule); continue; } if (!strcmp(arg, "--bisect")) { - handle_refs(revs, flags, for_each_bad_bisect_ref); - handle_refs(revs, flags ^ UNINTERESTING, for_each_good_bisect_ref); + handle_refs(submodule, revs, flags, for_each_bad_bisect_ref); + handle_refs(submodule, revs, flags ^ UNINTERESTING, for_each_good_bisect_ref); revs->bisect = 1; continue; } if (!strcmp(arg, "--tags")) { - handle_refs(revs, flags, for_each_tag_ref); + handle_refs(submodule, revs, flags, for_each_tag_ref_submodule); continue; } if (!strcmp(arg, "--remotes")) { - handle_refs(revs, flags, for_each_remote_ref); + handle_refs(submodule, revs, flags, for_each_remote_ref_submodule); continue; } - if (!prefixcmp(arg, "--glob=")) { + if ((argcount = parse_long_opt("glob", argv + i, &optarg))) { struct all_refs_cb cb; + i += argcount - 1; init_all_refs_cb(&cb, revs, flags); - for_each_glob_ref(handle_one_ref, arg + 7, &cb); + for_each_glob_ref(handle_one_ref, optarg, &cb); continue; } if (!prefixcmp(arg, "--branches=")) { diff --git a/revision.h b/revision.h index 36fdf22b29..05659c64ac 100644 --- a/revision.h +++ b/revision.h @@ -151,6 +151,7 @@ extern volatile show_early_output_fn_t show_early_output; struct setup_revision_opt { const char *def; void (*tweak)(struct rev_info *, struct setup_revision_opt *); + const char *submodule; }; extern void init_revisions(struct rev_info *revs, const char *prefix); @@ -313,21 +313,129 @@ const char *read_gitfile_gently(const char *path) return path; } +static const char *setup_explicit_git_dir(const char *gitdirenv, + const char *work_tree_env, int *nongit_ok) +{ + static char buffer[1024 + 1]; + const char *retval; + + if (PATH_MAX - 40 < strlen(gitdirenv)) + die("'$%s' too big", GIT_DIR_ENVIRONMENT); + if (!is_git_directory(gitdirenv)) { + if (nongit_ok) { + *nongit_ok = 1; + return NULL; + } + die("Not a git repository: '%s'", gitdirenv); + } + if (!work_tree_env) { + retval = set_work_tree(gitdirenv); + /* config may override worktree */ + if (check_repository_format_gently(nongit_ok)) + return NULL; + return retval; + } + if (check_repository_format_gently(nongit_ok)) + return NULL; + retval = get_relative_cwd(buffer, sizeof(buffer) - 1, + get_git_work_tree()); + if (!retval || !*retval) + return NULL; + set_git_dir(make_absolute_path(gitdirenv)); + if (chdir(work_tree_env) < 0) + die_errno ("Could not chdir to '%s'", work_tree_env); + strcat(buffer, "/"); + return retval; +} + +static int cwd_contains_git_dir(const char **gitfile_dirp) +{ + const char *gitfile_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT); + *gitfile_dirp = gitfile_dir; + if (gitfile_dir) { + if (set_git_dir(gitfile_dir)) + die("Repository setup failed"); + return 1; + } + return is_git_directory(DEFAULT_GIT_DIR_ENVIRONMENT); +} + +static const char *setup_discovered_git_dir(const char *work_tree_env, + int offset, int len, char *cwd, int *nongit_ok) +{ + int root_len; + + inside_git_dir = 0; + if (!work_tree_env) + inside_work_tree = 1; + root_len = offset_1st_component(cwd); + git_work_tree_cfg = xstrndup(cwd, offset > root_len ? offset : root_len); + if (check_repository_format_gently(nongit_ok)) + return NULL; + if (offset == len) + return NULL; + + /* Make "offset" point to past the '/', and add a '/' at the end */ + offset++; + cwd[len++] = '/'; + cwd[len] = 0; + return cwd + offset; +} + +static const char *setup_bare_git_dir(const char *work_tree_env, + int offset, int len, char *cwd, int *nongit_ok) +{ + int root_len; + + inside_git_dir = 1; + if (!work_tree_env) + inside_work_tree = 0; + if (offset != len) { + if (chdir(cwd)) + die_errno("Cannot come back to cwd"); + root_len = offset_1st_component(cwd); + cwd[offset > root_len ? offset : root_len] = '\0'; + set_git_dir(cwd); + } else + set_git_dir("."); + check_repository_format_gently(nongit_ok); + return NULL; +} + +static const char *setup_nongit(const char *cwd, int *nongit_ok) +{ + if (!nongit_ok) + die("Not a git repository (or any of the parent directories): %s", DEFAULT_GIT_DIR_ENVIRONMENT); + if (chdir(cwd)) + die_errno("Cannot come back to cwd"); + *nongit_ok = 1; + return NULL; +} + +static dev_t get_device_or_die(const char *path, const char *prefix) +{ + struct stat buf; + if (stat(path, &buf)) + die_errno("failed to stat '%s%s%s'", + prefix ? prefix : "", + prefix ? "/" : "", path); + return buf.st_dev; +} + /* * We cannot decide in this function whether we are in the work tree or * not, since the config can only be read _after_ this function was called. */ -const char *setup_git_directory_gently(int *nongit_ok) +static const char *setup_git_directory_gently_1(int *nongit_ok) { const char *work_tree_env = getenv(GIT_WORK_TREE_ENVIRONMENT); const char *env_ceiling_dirs = getenv(CEILING_DIRECTORIES_ENVIRONMENT); static char cwd[PATH_MAX+1]; const char *gitdirenv; const char *gitfile_dir; - int len, offset, ceil_offset, root_len; + int len, offset, ceil_offset; dev_t current_device = 0; int one_filesystem = 1; - struct stat buf; /* * Let's assume that we are in a git repository. @@ -343,38 +451,8 @@ const char *setup_git_directory_gently(int *nongit_ok) * validation. */ gitdirenv = getenv(GIT_DIR_ENVIRONMENT); - if (gitdirenv) { - if (PATH_MAX - 40 < strlen(gitdirenv)) - die("'$%s' too big", GIT_DIR_ENVIRONMENT); - if (is_git_directory(gitdirenv)) { - static char buffer[1024 + 1]; - const char *retval; - - if (!work_tree_env) { - retval = set_work_tree(gitdirenv); - /* config may override worktree */ - if (check_repository_format_gently(nongit_ok)) - return NULL; - return retval; - } - if (check_repository_format_gently(nongit_ok)) - return NULL; - retval = get_relative_cwd(buffer, sizeof(buffer) - 1, - get_git_work_tree()); - if (!retval || !*retval) - return NULL; - set_git_dir(make_absolute_path(gitdirenv)); - if (chdir(work_tree_env) < 0) - die_errno ("Could not chdir to '%s'", work_tree_env); - strcat(buffer, "/"); - return retval; - } - if (nongit_ok) { - *nongit_ok = 1; - return NULL; - } - die("Not a git repository: '%s'", gitdirenv); - } + if (gitdirenv) + return setup_explicit_git_dir(gitdirenv, work_tree_env, nongit_ok); if (!getcwd(cwd, sizeof(cwd)-1)) die_errno("Unable to read current working directory"); @@ -396,49 +474,21 @@ const char *setup_git_directory_gently(int *nongit_ok) */ offset = len = strlen(cwd); one_filesystem = !git_env_bool("GIT_DISCOVERY_ACROSS_FILESYSTEM", 0); - if (one_filesystem) { - if (stat(".", &buf)) - die_errno("failed to stat '.'"); - current_device = buf.st_dev; - } + if (one_filesystem) + current_device = get_device_or_die(".", NULL); for (;;) { - gitfile_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT); - if (gitfile_dir) { - if (set_git_dir(gitfile_dir)) - die("Repository setup failed"); - break; - } - if (is_git_directory(DEFAULT_GIT_DIR_ENVIRONMENT)) - break; - if (is_git_directory(".")) { - inside_git_dir = 1; - if (!work_tree_env) - inside_work_tree = 0; - if (offset != len) { - root_len = offset_1st_component(cwd); - cwd[offset > root_len ? offset : root_len] = '\0'; - set_git_dir(cwd); - } else - set_git_dir("."); - check_repository_format_gently(nongit_ok); - return NULL; - } + if (cwd_contains_git_dir(&gitfile_dir)) + return setup_discovered_git_dir(work_tree_env, offset, + len, cwd, nongit_ok); + if (is_git_directory(".")) + return setup_bare_git_dir(work_tree_env, offset, + len, cwd, nongit_ok); while (--offset > ceil_offset && cwd[offset] != '/'); - if (offset <= ceil_offset) { - if (nongit_ok) { - if (chdir(cwd)) - die_errno("Cannot come back to cwd"); - *nongit_ok = 1; - return NULL; - } - die("Not a git repository (or any of the parent directories): %s", DEFAULT_GIT_DIR_ENVIRONMENT); - } + if (offset <= ceil_offset) + return setup_nongit(cwd, nongit_ok); if (one_filesystem) { - if (stat("..", &buf)) { - cwd[offset] = '\0'; - die_errno("failed to stat '%s/..'", cwd); - } - if (buf.st_dev != current_device) { + dev_t parent_device = get_device_or_die("..", cwd); + if (parent_device != current_device) { if (nongit_ok) { if (chdir(cwd)) die_errno("Cannot come back to cwd"); @@ -455,22 +505,16 @@ const char *setup_git_directory_gently(int *nongit_ok) die_errno("Cannot change to '%s/..'", cwd); } } +} - inside_git_dir = 0; - if (!work_tree_env) - inside_work_tree = 1; - root_len = offset_1st_component(cwd); - git_work_tree_cfg = xstrndup(cwd, offset > root_len ? offset : root_len); - if (check_repository_format_gently(nongit_ok)) - return NULL; - if (offset == len) - return NULL; +const char *setup_git_directory_gently(int *nongit_ok) +{ + const char *prefix; - /* Make "offset" point to past the '/', and add a '/' at the end */ - offset++; - cwd[len++] = '/'; - cwd[len] = 0; - return cwd + offset; + prefix = setup_git_directory_gently_1(nongit_ok); + if (startup_info) + startup_info->have_repository = !nongit_ok || !*nongit_ok; + return prefix; } int git_config_perm(const char *var, const char *value) diff --git a/sha1_file.c b/sha1_file.c index e42ef96d45..0cd9435619 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2086,6 +2086,7 @@ void *read_sha1_file_repl(const unsigned char *sha1, { const unsigned char *repl = lookup_replace_object(sha1); void *data = read_object(repl, type, size); + char *path; /* die if we replaced an object with one that does not exist */ if (!data && repl != sha1) @@ -2093,8 +2094,16 @@ void *read_sha1_file_repl(const unsigned char *sha1, sha1_to_hex(repl), sha1_to_hex(sha1)); /* legacy behavior is to die on corrupted objects */ - if (!data && (has_loose_object(repl) || has_packed_and_bad(repl))) - die("object %s is corrupted", sha1_to_hex(repl)); + if (!data) { + if (has_loose_object(repl)) { + path = sha1_file_name(sha1); + die("loose object %s (stored in %s) is corrupted", sha1_to_hex(repl), path); + } + if (has_packed_and_bad(repl)) { + path = sha1_pack_name(sha1); + die("packed object %s (stored in %s) is corrupted", sha1_to_hex(repl), path); + } + } if (replacement) *replacement = repl; diff --git a/sha1_name.c b/sha1_name.c index 4af94fa598..7b7e61719f 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -342,7 +342,7 @@ static int get_sha1_1(const char *name, int len, unsigned char *sha1); static int get_sha1_basic(const char *str, int len, unsigned char *sha1) { - static const char *warning = "warning: refname '%.*s' is ambiguous.\n"; + static const char *warn_msg = "refname '%.*s' is ambiguous."; char *real_ref = NULL; int refs_found = 0; int at, reflog_len; @@ -390,7 +390,7 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1) return -1; if (warn_ambiguous_refs && refs_found > 1) - fprintf(stderr, warning, len, str); + warning(warn_msg, len, str); if (reflog_len) { int nth, i; @@ -426,14 +426,14 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1) if (read_ref_at(real_ref, at_time, nth, sha1, NULL, &co_time, &co_tz, &co_cnt)) { if (at_time) - fprintf(stderr, - "warning: Log for '%.*s' only goes " - "back to %s.\n", len, str, + warning("Log for '%.*s' only goes " + "back to %s.", len, str, show_date(co_time, co_tz, DATE_RFC2822)); - else - fprintf(stderr, - "warning: Log for '%.*s' only has " - "%d entries.\n", len, str, co_cnt); + else { + free(real_ref); + die("Log for '%.*s' only has %d entries.", + len, str, co_cnt); + } } } @@ -47,7 +47,7 @@ struct commit_list *get_shallow_commits(struct object_array *heads, int depth, { int i = 0, cur_depth = 0; struct commit_list *result = NULL; - struct object_array stack = {0, 0, NULL}; + struct object_array stack = OBJECT_ARRAY_INIT; struct commit *commit = NULL; while (commit || i < heads->nr || stack.nr) { diff --git a/string-list.h b/string-list.h index 680d600d16..494693898b 100644 --- a/string-list.h +++ b/string-list.h @@ -12,6 +12,9 @@ struct string_list unsigned int strdup_strings:1; }; +#define STRING_LIST_INIT_NODUP { NULL, 0, 0, 0 } +#define STRING_LIST_INIT_DUP { NULL, 0, 0, 1 } + void print_string_list(const struct string_list *p, const char *text); void string_list_clear(struct string_list *list, int free_util); @@ -20,10 +23,12 @@ void string_list_clear(struct string_list *list, int free_util); typedef void (*string_list_clear_func_t)(void *p, const char *str); void string_list_clear_func(struct string_list *list, string_list_clear_func_t clearfunc); -/* Use this function to iterate over each item */ +/* Use this function or the macro below to iterate over each item */ typedef int (*string_list_each_func_t)(struct string_list_item *, void *); int for_each_string_list(struct string_list *list, string_list_each_func_t, void *cb_data); +#define for_each_string_list_item(item,list) \ + for (item = (list)->items; item < (list)->items + (list)->nr; ++item) /* Use these functions only on sorted lists: */ int string_list_has_string(const struct string_list *list, const char *string); diff --git a/submodule.c b/submodule.c index 61cb6e21dd..91a4758747 100644 --- a/submodule.c +++ b/submodule.c @@ -6,6 +6,11 @@ #include "revision.h" #include "run-command.h" #include "diffcore.h" +#include "refs.h" +#include "string-list.h" + +struct string_list config_name_for_path; +struct string_list config_ignore_for_name; static int add_submodule_odb(const char *path) { @@ -46,16 +51,90 @@ done: return ret; } +void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, + const char *path) +{ + struct string_list_item *path_option, *ignore_option; + path_option = unsorted_string_list_lookup(&config_name_for_path, path); + if (path_option) { + ignore_option = unsorted_string_list_lookup(&config_ignore_for_name, path_option->util); + if (ignore_option) + handle_ignore_submodules_arg(diffopt, ignore_option->util); + } +} + +static int submodule_config(const char *var, const char *value, void *cb) +{ + if (!prefixcmp(var, "submodule.")) + return parse_submodule_config_option(var, value); + return 0; +} + +void gitmodules_config(void) +{ + const char *work_tree = get_git_work_tree(); + if (work_tree) { + struct strbuf gitmodules_path = STRBUF_INIT; + strbuf_addstr(&gitmodules_path, work_tree); + strbuf_addstr(&gitmodules_path, "/.gitmodules"); + git_config_from_file(submodule_config, gitmodules_path.buf, NULL); + strbuf_release(&gitmodules_path); + } +} + +int parse_submodule_config_option(const char *var, const char *value) +{ + int len; + struct string_list_item *config; + struct strbuf submodname = STRBUF_INIT; + + var += 10; /* Skip "submodule." */ + + len = strlen(var); + if ((len > 5) && !strcmp(var + len - 5, ".path")) { + strbuf_add(&submodname, var, len - 5); + config = unsorted_string_list_lookup(&config_name_for_path, value); + if (config) + free(config->util); + else + config = string_list_append(&config_name_for_path, xstrdup(value)); + config->util = strbuf_detach(&submodname, NULL); + strbuf_release(&submodname); + } else if ((len > 7) && !strcmp(var + len - 7, ".ignore")) { + if (strcmp(value, "untracked") && strcmp(value, "dirty") && + strcmp(value, "all") && strcmp(value, "none")) { + warning("Invalid parameter \"%s\" for config option \"submodule.%s.ignore\"", value, var); + return 0; + } + + strbuf_add(&submodname, var, len - 7); + config = unsorted_string_list_lookup(&config_ignore_for_name, submodname.buf); + if (config) + free(config->util); + else + config = string_list_append(&config_ignore_for_name, + strbuf_detach(&submodname, NULL)); + strbuf_release(&submodname); + config->util = xstrdup(value); + return 0; + } + return 0; +} + void handle_ignore_submodules_arg(struct diff_options *diffopt, const char *arg) { + DIFF_OPT_CLR(diffopt, IGNORE_SUBMODULES); + DIFF_OPT_CLR(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES); + DIFF_OPT_CLR(diffopt, IGNORE_DIRTY_SUBMODULES); + if (!strcmp(arg, "all")) DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES); else if (!strcmp(arg, "untracked")) DIFF_OPT_SET(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES); else if (!strcmp(arg, "dirty")) DIFF_OPT_SET(diffopt, IGNORE_DIRTY_SUBMODULES); - else + else if (strcmp(arg, "none")) die("bad --ignore-submodules argument: %s", arg); } @@ -218,3 +297,163 @@ unsigned is_submodule_modified(const char *path, int ignore_untracked) strbuf_release(&buf); return dirty_submodule; } + +static int find_first_merges(struct object_array *result, const char *path, + struct commit *a, struct commit *b) +{ + int i, j; + struct object_array merges; + struct commit *commit; + int contains_another; + + char merged_revision[42]; + const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path", + "--all", merged_revision, NULL }; + struct rev_info revs; + struct setup_revision_opt rev_opts; + + memset(&merges, 0, sizeof(merges)); + memset(result, 0, sizeof(struct object_array)); + memset(&rev_opts, 0, sizeof(rev_opts)); + + /* get all revisions that merge commit a */ + snprintf(merged_revision, sizeof(merged_revision), "^%s", + sha1_to_hex(a->object.sha1)); + init_revisions(&revs, NULL); + rev_opts.submodule = path; + setup_revisions(sizeof(rev_args)/sizeof(char *)-1, rev_args, &revs, &rev_opts); + + /* save all revisions from the above list that contain b */ + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + while ((commit = get_revision(&revs)) != NULL) { + struct object *o = &(commit->object); + if (in_merge_bases(b, &commit, 1)) + add_object_array(o, NULL, &merges); + } + + /* Now we've got all merges that contain a and b. Prune all + * merges that contain another found merge and save them in + * result. + */ + for (i = 0; i < merges.nr; i++) { + struct commit *m1 = (struct commit *) merges.objects[i].item; + + contains_another = 0; + for (j = 0; j < merges.nr; j++) { + struct commit *m2 = (struct commit *) merges.objects[j].item; + if (i != j && in_merge_bases(m2, &m1, 1)) { + contains_another = 1; + break; + } + } + + if (!contains_another) + add_object_array(merges.objects[i].item, + merges.objects[i].name, result); + } + + free(merges.objects); + return result->nr; +} + +static void print_commit(struct commit *commit) +{ + struct strbuf sb = STRBUF_INIT; + struct pretty_print_context ctx = {0}; + ctx.date_mode = DATE_NORMAL; + format_commit_message(commit, " %h: %m %s", &sb, &ctx); + fprintf(stderr, "%s\n", sb.buf); + strbuf_release(&sb); +} + +#define MERGE_WARNING(path, msg) \ + warning("Failed to merge submodule %s (%s)", path, msg); + +int merge_submodule(unsigned char result[20], const char *path, + const unsigned char base[20], const unsigned char a[20], + const unsigned char b[20]) +{ + struct commit *commit_base, *commit_a, *commit_b; + int parent_count; + struct object_array merges; + + int i; + + /* store a in result in case we fail */ + hashcpy(result, a); + + /* we can not handle deletion conflicts */ + if (is_null_sha1(base)) + return 0; + if (is_null_sha1(a)) + return 0; + if (is_null_sha1(b)) + return 0; + + if (add_submodule_odb(path)) { + MERGE_WARNING(path, "not checked out"); + return 0; + } + + if (!(commit_base = lookup_commit_reference(base)) || + !(commit_a = lookup_commit_reference(a)) || + !(commit_b = lookup_commit_reference(b))) { + MERGE_WARNING(path, "commits not present"); + return 0; + } + + /* check whether both changes are forward */ + if (!in_merge_bases(commit_base, &commit_a, 1) || + !in_merge_bases(commit_base, &commit_b, 1)) { + MERGE_WARNING(path, "commits don't follow merge-base"); + return 0; + } + + /* Case #1: a is contained in b or vice versa */ + if (in_merge_bases(commit_a, &commit_b, 1)) { + hashcpy(result, b); + return 1; + } + if (in_merge_bases(commit_b, &commit_a, 1)) { + hashcpy(result, a); + return 1; + } + + /* + * Case #2: There are one or more merges that contain a and b in + * the submodule. If there is only one, then present it as a + * suggestion to the user, but leave it marked unmerged so the + * user needs to confirm the resolution. + */ + + /* find commit which merges them */ + parent_count = find_first_merges(&merges, path, commit_a, commit_b); + switch (parent_count) { + case 0: + MERGE_WARNING(path, "merge following commits not found"); + break; + + case 1: + MERGE_WARNING(path, "not fast-forward"); + fprintf(stderr, "Found a possible merge resolution " + "for the submodule:\n"); + print_commit((struct commit *) merges.objects[0].item); + fprintf(stderr, + "If this is correct simply add it to the index " + "for example\n" + "by using:\n\n" + " git update-index --cacheinfo 160000 %s \"%s\"\n\n" + "which will accept this suggestion.\n", + sha1_to_hex(merges.objects[0].item->sha1), path); + break; + + default: + MERGE_WARNING(path, "multiple merges found"); + for (i = 0; i < merges.nr; i++) + print_commit((struct commit *) merges.objects[i].item); + } + + free(merges.objects); + return 0; +} diff --git a/submodule.h b/submodule.h index 6fd3bb4070..386f410a66 100644 --- a/submodule.h +++ b/submodule.h @@ -3,11 +3,17 @@ struct diff_options; +void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, + const char *path); +void gitmodules_config(); +int parse_submodule_config_option(const char *var, const char *value); void handle_ignore_submodules_arg(struct diff_options *diffopt, const char *); void show_submodule_summary(FILE *f, const char *path, unsigned char one[20], unsigned char two[20], unsigned dirty_submodule, const char *del, const char *add, const char *reset); unsigned is_submodule_modified(const char *path, int ignore_untracked); +int merge_submodule(unsigned char result[20], const char *path, const unsigned char base[20], + const unsigned char a[20], const unsigned char b[20]); #endif diff --git a/t/.gitignore b/t/.gitignore index 7dcbb232cd..4e731dc1e3 100644 --- a/t/.gitignore +++ b/t/.gitignore @@ -1,2 +1,3 @@ /trash directory* /test-results +/.prove diff --git a/t/Makefile b/t/Makefile index cf5f9e2e1e..c7baefb7ea 100644 --- a/t/Makefile +++ b/t/Makefile @@ -8,6 +8,7 @@ #GIT_TEST_OPTS=--verbose --debug SHELL_PATH ?= $(SHELL) +PERL_PATH ?= /usr/bin/perl TAR ?= $(TAR) RM ?= rm -f @@ -28,8 +29,8 @@ pre-clean: clean: $(RM) -r 'trash directory'.* test-results - $(RM) t????/cvsroot/CVSROOT/?* $(RM) -r valgrind/bin + $(RM) .prove aggregate-results-and-cleanup: $(T) $(MAKE) aggregate-results @@ -48,4 +49,42 @@ full-svn-test: valgrind: GIT_TEST_OPTS=--valgrind $(MAKE) -.PHONY: pre-clean $(T) aggregate-results clean valgrind +# Smoke testing targets +-include ../GIT-VERSION-FILE +uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo unknown') +uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo unknown') + +test-results: + mkdir -p test-results + +test-results/git-smoke.tar.gz: test-results + $(PERL_PATH) ./harness \ + --archive="test-results/git-smoke.tar.gz" \ + $(T) + +smoke: test-results/git-smoke.tar.gz + +SMOKE_UPLOAD_FLAGS = +ifdef SMOKE_USERNAME + SMOKE_UPLOAD_FLAGS += -F username="$(SMOKE_USERNAME)" -F password="$(SMOKE_PASSWORD)" +endif +ifdef SMOKE_COMMENT + SMOKE_UPLOAD_FLAGS += -F comments="$(SMOKE_COMMENT)" +endif +ifdef SMOKE_TAGS + SMOKE_UPLOAD_FLAGS += -F tags="$(SMOKE_TAGS)" +endif + +smoke_report: smoke + curl \ + -H "Expect: " \ + -F project=Git \ + -F architecture="$(uname_M)" \ + -F platform="$(uname_S)" \ + -F revision="$(GIT_VERSION)" \ + -F report_file=@test-results/git-smoke.tar.gz \ + $(SMOKE_UPLOAD_FLAGS) \ + http://smoke.git.nix.is/app/projects/process_add_report/1 \ + | grep -v ^Redirecting + +.PHONY: pre-clean $(T) aggregate-results clean valgrind smoke smoke_report @@ -268,6 +268,18 @@ Do: git push gh && test ... + - Check the test coverage for your tests. See the "Test coverage" + below. + + Don't blindly follow test coverage metrics, they're a good way to + spot if you've missed something. If a new function you added + doesn't have any coverage you're probably doing something wrong, + but having 100% coverage doesn't necessarily mean that you tested + everything. + + Tests that are likely to smoke out future regressions are better + than tests that just inflate the coverage metrics. + Don't: - exit() within a <script> part. @@ -307,9 +319,21 @@ Keep in mind: Skipping tests -------------- -If you need to skip all the remaining tests you should set skip_all -and immediately call test_done. The string you give to skip_all will -be used as an explanation for why the test was skipped. for instance: +If you need to skip tests you should do so be using the three-arg form +of the test_* functions (see the "Test harness library" section +below), e.g.: + + test_expect_success PERL 'I need Perl' " + '$PERL_PATH' -e 'hlagh() if unf_unf()' + " + +The advantage of skipping tests like this is that platforms that don't +have the PERL and other optional dependencies get an indication of how +many tests they're missing. + +If the test code is too hairy for that (i.e. does a lot of setup work +outside test assertions) you can also skip all remaining tests by +setting skip_all and immediately call test_done: if ! test_have_prereq PERL then @@ -317,6 +341,9 @@ be used as an explanation for why the test was skipped. for instance: test_done fi +The string you give to skip_all will be used as an explanation for why +the test was skipped. + End with test_done ------------------ @@ -350,6 +377,12 @@ library for your script to use. test_expect_success TTY 'git --paginate rev-list uses a pager' \ ' ... ' + You can also supply a comma-separated list of prerequisites, in the + rare case where your test depends on more than one: + + test_expect_success PERL,PYTHON 'yo dawg' \ + ' test $(perl -E 'print eval "1 +" . qx[python -c "print 2"]') == "4" ' + - test_expect_failure [<prereq>] <message> <script> This is NOT the opposite of test_expect_success, but is used @@ -404,11 +437,12 @@ library for your script to use. - test_set_prereq SOME_PREREQ Set a test prerequisite to be used later with test_have_prereq. The - test-lib will set some prerequisites for you, e.g. PERL and PYTHON - which are derived from ./GIT-BUILD-OPTIONS (grep test_set_prereq - test-lib.sh for more). Others you can set yourself and use later - with either test_have_prereq directly, or the three argument - invocation of test_expect_success and test_expect_failure. + test-lib will set some prerequisites for you, see the + "Prerequisites" section below for a full list of these. + + Others you can set yourself and use later with either + test_have_prereq directly, or the three argument invocation of + test_expect_success and test_expect_failure. - test_have_prereq SOME PREREQ @@ -467,6 +501,13 @@ library for your script to use. <expected> file. This behaves like "cmp" but produces more helpful output when the test is run with "-v" option. + - test_path_is_file <file> [<diagnosis>] + test_path_is_dir <dir> [<diagnosis>] + test_path_is_missing <path> [<diagnosis>] + + Check whether a file/directory exists or doesn't. <diagnosis> will + be displayed if the test fails. + - test_when_finished <script> Prepend <script> to a list of commands to run to clean up @@ -481,6 +522,45 @@ library for your script to use. ... ' +Prerequisites +------------- + +These are the prerequisites that the test library predefines with +test_have_prereq. + +See the prereq argument to the test_* functions in the "Test harness +library" section above and the "test_have_prereq" function for how to +use these, and "test_set_prereq" for how to define your own. + + - PERL & PYTHON + + Git wasn't compiled with NO_PERL=YesPlease or + NO_PYTHON=YesPlease. Wrap any tests that need Perl or Python in + these. + + - POSIXPERM + + The filesystem supports POSIX style permission bits. + + - BSLASHPSPEC + + Backslashes in pathspec are not directory separators. This is not + set on Windows. See 6fd1106a for details. + + - EXECKEEPSPID + + The process retains the same pid across exec(2). See fb9a2bea for + details. + + - SYMLINKS + + The filesystem we're on supports symbolic links. E.g. a FAT + filesystem doesn't support these. See 704a3143 for details. + + - SANITY + + Test is not run by root user, and an attempt to write to an + unwritable file is expected to fail correctly. Tips for Writing Tests ---------------------- @@ -508,3 +588,115 @@ the purpose of t0000-basic.sh, which is to isolate that level of validation in one place. Your test also ends up needing updating when such a change to the internal happens, so do _not_ do it and leave the low level of validation to t0000-basic.sh. + +Test coverage +------------- + +You can use the coverage tests to find code paths that are not being +used or properly exercised yet. + +To do that, run the coverage target at the top-level (not in the t/ +directory): + + make coverage + +That'll compile Git with GCC's coverage arguments, and generate a test +report with gcov after the tests finish. Running the coverage tests +can take a while, since running the tests in parallel is incompatible +with GCC's coverage mode. + +After the tests have run you can generate a list of untested +functions: + + make coverage-untested-functions + +You can also generate a detailed per-file HTML report using the +Devel::Cover module. To install it do: + + # On Debian or Ubuntu: + sudo aptitude install libdevel-cover-perl + + # From the CPAN with cpanminus + curl -L http://cpanmin.us | perl - --sudo --self-upgrade + cpanm --sudo Devel::Cover + +Then, at the top-level: + + make cover_db_html + +That'll generate a detailed cover report in the "cover_db_html" +directory, which you can then copy to a webserver, or inspect locally +in a browser. + +Smoke testing +------------- + +The Git test suite has support for smoke testing. Smoke testing is +when you submit the results of a test run to a central server for +analysis and aggregation. + +Running a smoke tester is an easy and valuable way of contributing to +Git development, particularly if you have access to an uncommon OS on +obscure hardware. + +After building Git you can generate a smoke report like this in the +"t" directory: + + make clean smoke + +You can also pass arguments via the environment. This should make it +faster: + + GIT_TEST_OPTS='--root=/dev/shm' TEST_JOBS=10 make clean smoke + +The "smoke" target will run the Git test suite with Perl's +"TAP::Harness" module, and package up the results in a .tar.gz archive +with "TAP::Harness::Archive". The former is included with Perl v5.10.1 +or later, but you'll need to install the latter from the CPAN. See the +"Test coverage" section above for how you might do that. + +Once the "smoke" target finishes you'll see a message like this: + + TAP Archive created at <path to git>/t/test-results/git-smoke.tar.gz + +To upload the smoke report you need to have curl(1) installed, then +do: + + make smoke_report + +To upload the report anonymously. Hopefully that'll return something +like "Reported #7 added.". + +If you're going to be uploading reports frequently please request a +user account by E-Mailing gitsmoke@v.nix.is. Once you have a username +and password you'll be able to do: + + SMOKE_USERNAME=<username> SMOKE_PASSWORD=<password> make smoke_report + +You can also add an additional comment to attach to the report, and/or +a comma separated list of tags: + + SMOKE_USERNAME=<username> SMOKE_PASSWORD=<password> \ + SMOKE_COMMENT=<comment> SMOKE_TAGS=<tags> \ + make smoke_report + +Once the report is uploaded it'll be made available at +http://smoke.git.nix.is, here's an overview of Recent Smoke Reports +for Git: + + http://smoke.git.nix.is/app/projects/smoke_reports/1 + +The reports will also be mirrored to GitHub every few hours: + + http://github.com/gitsmoke/smoke-reports + +The Smolder SQLite database is also mirrored and made available for +download: + + http://github.com/gitsmoke/smoke-database + +Note that the database includes hashed (with crypt()) user passwords +and E-Mail addresses. Don't use a valuable password for the smoke +service if you have an account, or an E-Mail address you don't want to +be publicly known. The user accounts are just meant to be convenient +labels, they're not meant to be secure. diff --git a/t/harness b/t/harness new file mode 100755 index 0000000000..f5c02f49b7 --- /dev/null +++ b/t/harness @@ -0,0 +1,21 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Getopt::Long (); +use TAP::Harness::Archive; + +Getopt::Long::Parser->new( + config => [ qw/ pass_through / ], +)->getoptions( + 'jobs:1' => \(my $jobs = $ENV{TEST_JOBS}), + 'archive=s' => \my $archive, +) or die "$0: Couldn't getoptions()"; + +TAP::Harness::Archive->new({ + jobs => $jobs, + archive => $archive, + ($ENV{GIT_TEST_OPTS} + ? (test_args => [ split /\s+/, $ENV{GIT_TEST_OPTS} ]) + : ()), + extra_properties => {}, +})->runtests(@ARGV); diff --git a/t/lib-cvs.sh b/t/lib-cvs.sh index 648d1619c8..44263ade25 100644 --- a/t/lib-cvs.sh +++ b/t/lib-cvs.sh @@ -3,9 +3,6 @@ . ./test-lib.sh unset CVS_SERVER -# for clean cvsps cache -HOME=$(pwd) -export HOME if ! type cvs >/dev/null 2>&1 then @@ -30,6 +27,12 @@ case "$cvsps_version" in ;; esac +setup_cvs_test_repository () { + CVSROOT="$(pwd)/.cvsroot" && + cp -r "$TEST_DIRECTORY/$1/cvsroot" "$CVSROOT" && + export CVSROOT +} + test_cvs_co () { # Usage: test_cvs_co BRANCH_NAME rm -rf module-cvs-"$1" diff --git a/t/lib-patch-mode.sh b/t/lib-patch-mode.sh index 375e248651..06c3c91762 100644 --- a/t/lib-patch-mode.sh +++ b/t/lib-patch-mode.sh @@ -2,11 +2,6 @@ . ./test-lib.sh -if ! test_have_prereq PERL; then - skip_all='skipping --patch tests, perl not available' - test_done -fi - set_state () { echo "$3" > "$1" && git add "$1" && diff --git a/t/lib-prereq-FILEMODE.sh b/t/lib-prereq-FILEMODE.sh new file mode 100644 index 0000000000..bce5a4c8bd --- /dev/null +++ b/t/lib-prereq-FILEMODE.sh @@ -0,0 +1,11 @@ +#!/bin/sh +# +# Copyright (c) 2010 Ævar Arnfjörð Bjarmason +# + +if test "$(git config --bool core.filemode)" = false +then + say 'filemode disabled on the filesystem' +else + test_set_prereq FILEMODE +fi diff --git a/t/lib-rebase.sh b/t/lib-rebase.sh index 6aefe27593..6ccf797091 100644 --- a/t/lib-rebase.sh +++ b/t/lib-rebase.sh @@ -47,6 +47,8 @@ for line in $FAKE_LINES; do case $line in squash|fixup|edit|reword) action="$line";; + exec*) + echo "$line" | sed 's/_/ /g' >> "$1";; "#") echo '# comment' >> "$1";; ">") diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index f2c73369a5..f688bd3ef5 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -54,9 +54,40 @@ test_expect_success 'success is reported like this' ' test_expect_failure 'pretend we have a known breakage' ' false ' + +test_expect_success 'pretend we have fixed a known breakage (run in sub test-lib)' " + mkdir passing-todo && + (cd passing-todo && + cat >passing-todo.sh <<EOF && +#!$SHELL_PATH + +test_description='A passing TODO test + +This is run in a sub test-lib so that we do not get incorrect passing +metrics +' + +# Point to the t/test-lib.sh, which isn't in ../ as usual +TEST_DIRECTORY=\"$TEST_DIRECTORY\" +. \"\$TEST_DIRECTORY\"/test-lib.sh + test_expect_failure 'pretend we have fixed a known breakage' ' : ' + +test_done +EOF + chmod +x passing-todo.sh && + ./passing-todo.sh >out 2>err && + ! test -s err && +cat >expect <<EOF && +ok 1 - pretend we have fixed a known breakage # TODO known breakage +# fixed 1 known breakage(s) +# passed all 1 test(s) +1..1 +EOF + test_cmp expect out) +" test_set_prereq HAVEIT haveit=no test_expect_success HAVEIT 'test runs if prerequisite is satisfied' ' @@ -73,6 +104,27 @@ then exit 1 fi +test_set_prereq HAVETHIS +haveit=no +test_expect_success HAVETHIS,HAVEIT 'test runs if prerequisites are satisfied' ' + test_have_prereq HAVEIT && + test_have_prereq HAVETHIS && + haveit=yes +' +donthaveit=yes +test_expect_success HAVEIT,DONTHAVEIT 'unmet prerequisites causes test to be skipped' ' + donthaveit=no +' +donthaveiteither=yes +test_expect_success DONTHAVEIT,HAVEIT 'unmet prerequisites causes test to be skipped' ' + donthaveiteither=no +' +if test $haveit$donthaveit$donthaveiteither != yesyesyes +then + say "bug in test framework: multiple prerequisite tags do not work reliably" + exit 1 +fi + clean=no test_expect_success 'tests clean up after themselves' ' test_when_finished clean=yes diff --git a/t/t0001-init.sh b/t/t0001-init.sh index 7c0a698b92..7fe8883ae0 100755 --- a/t/t0001-init.sh +++ b/t/t0001-init.sh @@ -171,8 +171,6 @@ test_expect_success 'init with init.templatedir set' ' mkdir templatedir-source && echo Content >templatedir-source/file && ( - HOME="`pwd`" && - export HOME && test_config="${HOME}/.gitconfig" && git config -f "$test_config" init.templatedir "${HOME}/templatedir-source" && mkdir templatedir-set && @@ -188,8 +186,6 @@ test_expect_success 'init with init.templatedir set' ' test_expect_success 'init --bare/--shared overrides system/global config' ' ( - HOME="`pwd`" && - export HOME && test_config="$HOME"/.gitconfig && unset GIT_CONFIG_NOGLOBAL && git config -f "$test_config" core.bare false && @@ -205,8 +201,6 @@ test_expect_success 'init --bare/--shared overrides system/global config' ' test_expect_success 'init honors global core.sharedRepository' ' ( - HOME="`pwd`" && - export HOME && test_config="$HOME"/.gitconfig && unset GIT_CONFIG_NOGLOBAL && git config -f "$test_config" core.sharedRepository 0666 && @@ -301,7 +295,7 @@ test_expect_success 'init notices EEXIST (2)' ' ) ' -test_expect_success POSIXPERM 'init notices EPERM' ' +test_expect_success POSIXPERM,SANITY 'init notices EPERM' ' rm -fr newdir && ( mkdir newdir && diff --git a/t/t0004-unwritable.sh b/t/t0004-unwritable.sh index 2342ac5788..385b1265de 100755 --- a/t/t0004-unwritable.sh +++ b/t/t0004-unwritable.sh @@ -15,7 +15,7 @@ test_expect_success setup ' ' -test_expect_success POSIXPERM 'write-tree should notice unwritable repository' ' +test_expect_success POSIXPERM,SANITY 'write-tree should notice unwritable repository' ' ( chmod a-w .git/objects .git/objects/?? && @@ -27,7 +27,7 @@ test_expect_success POSIXPERM 'write-tree should notice unwritable repository' ' ' -test_expect_success POSIXPERM 'commit should notice unwritable repository' ' +test_expect_success POSIXPERM,SANITY 'commit should notice unwritable repository' ' ( chmod a-w .git/objects .git/objects/?? && @@ -39,7 +39,7 @@ test_expect_success POSIXPERM 'commit should notice unwritable repository' ' ' -test_expect_success POSIXPERM 'update-index should notice unwritable repository' ' +test_expect_success POSIXPERM,SANITY 'update-index should notice unwritable repository' ' ( echo 6O >file && @@ -52,7 +52,7 @@ test_expect_success POSIXPERM 'update-index should notice unwritable repository' ' -test_expect_success POSIXPERM 'add should notice unwritable repository' ' +test_expect_success POSIXPERM,SANITY 'add should notice unwritable repository' ' ( echo b >file && diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh new file mode 100755 index 0000000000..d3225ada68 --- /dev/null +++ b/t/t0080-vcs-svn.sh @@ -0,0 +1,171 @@ +#!/bin/sh + +test_description='check infrastructure for svn importer' + +. ./test-lib.sh +uint32_max=4294967295 + +test_expect_success 'obj pool: store data' ' + cat <<-\EOF >expected && + 0 + 1 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + set one 13 + test one 13 + reset one + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: NULL is offset ~0' ' + echo "$uint32_max" >expected && + echo null one | test-obj-pool >actual && + test_cmp expected actual +' + +test_expect_success 'obj pool: out-of-bounds access' ' + cat <<-EOF >expected && + 0 + 0 + $uint32_max + $uint32_max + 16 + 20 + $uint32_max + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + alloc two 16 + offset one 20 + offset two 20 + alloc one 5 + offset one 20 + free one 1 + offset one 20 + reset one + reset two + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: high-water mark' ' + cat <<-\EOF >expected && + 0 + 0 + 10 + 20 + 20 + 20 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 10 + committed one + alloc one 10 + commit one + committed one + alloc one 10 + free one 20 + committed one + reset one + EOF + test_cmp expected actual +' + +test_expect_success 'line buffer' ' + echo HELLO >expected1 && + printf "%s\n" "" HELLO >expected2 && + echo >expected3 && + printf "%s\n" "" Q | q_to_nul >expected4 && + printf "%s\n" foo "" >expected5 && + printf "%s\n" "" foo >expected6 && + + test-line-buffer <<-\EOF >actual1 && + 5 + HELLO + EOF + + test-line-buffer <<-\EOF >actual2 && + 0 + + 5 + HELLO + EOF + + q_to_nul <<-\EOF | + 1 + Q + EOF + test-line-buffer >actual3 && + + q_to_nul <<-\EOF | + 0 + + 1 + Q + EOF + test-line-buffer >actual4 && + + test-line-buffer <<-\EOF >actual5 && + 5 + foo + EOF + + test-line-buffer <<-\EOF >actual6 && + 0 + + 5 + foo + EOF + + test_cmp expected1 actual1 && + test_cmp expected2 actual2 && + test_cmp expected3 actual3 && + test_cmp expected4 actual4 && + test_cmp expected5 actual5 && + test_cmp expected6 actual6 +' + +test_expect_success 'string pool' ' + echo a does not equal b >expected.differ && + echo a equals a >expected.match && + echo equals equals equals >expected.matchmore && + + test-string-pool "a,--b" >actual.differ && + test-string-pool "a,a" >actual.match && + test-string-pool "equals-equals" >actual.matchmore && + test_must_fail test-string-pool a,a,a && + test_must_fail test-string-pool a && + + test_cmp expected.differ actual.differ && + test_cmp expected.match actual.match && + test_cmp expected.matchmore actual.matchmore +' + +test_expect_success 'treap sort' ' + cat <<-\EOF >unsorted && + 68 + 12 + 13 + 13 + 68 + 13 + 13 + 21 + 10 + 11 + 12 + 13 + 13 + EOF + sort unsorted >expected && + + test-treap <unsorted >actual && + test_cmp expected actual +' + +test_done diff --git a/t/t1001-read-tree-m-2way.sh b/t/t1001-read-tree-m-2way.sh index 0c562bb820..93ca84f9e6 100755 --- a/t/t1001-read-tree-m-2way.sh +++ b/t/t1001-read-tree-m-2way.sh @@ -359,7 +359,7 @@ test_expect_success \ test_expect_success \ 'a/b (untracked) vs a, plus c/d case test.' \ - '! git read-tree -u -m "$treeH" "$treeM" && + 'test_must_fail git read-tree -u -m "$treeH" "$treeM" && git ls-files --stage && test -f a/b' diff --git a/t/t1004-read-tree-m-u-wf.sh b/t/t1004-read-tree-m-u-wf.sh index f19b4a2a4a..eb8e3d4476 100755 --- a/t/t1004-read-tree-m-u-wf.sh +++ b/t/t1004-read-tree-m-u-wf.sh @@ -177,7 +177,7 @@ test_expect_success SYMLINKS 'funny symlink in work tree' ' ' -test_expect_success SYMLINKS 'funny symlink in work tree, un-unlink-able' ' +test_expect_success SYMLINKS,SANITY 'funny symlink in work tree, un-unlink-able' ' rm -fr a b && git reset --hard && diff --git a/t/t1011-read-tree-sparse-checkout.sh b/t/t1011-read-tree-sparse-checkout.sh index 62246dbf95..9a07de1a5b 100755 --- a/t/t1011-read-tree-sparse-checkout.sh +++ b/t/t1011-read-tree-sparse-checkout.sh @@ -1,16 +1,30 @@ #!/bin/sh -test_description='sparse checkout tests' +test_description='sparse checkout tests + +* (tag: removed, master) removed +| D sub/added +* (HEAD, tag: top) modified and added +| M init.t +| A sub/added +* (tag: init) init + A init.t +' . ./test-lib.sh -cat >expected <<EOF -100644 77f0ba1734ed79d12881f81b36ee134de6a3327b 0 init.t -100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 sub/added -EOF test_expect_success 'setup' ' + cat >expected <<-\EOF && + 100644 77f0ba1734ed79d12881f81b36ee134de6a3327b 0 init.t + 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 sub/added + EOF + cat >expected.swt <<-\EOF && + H init.t + H sub/added + EOF + test_commit init && - echo modified >> init.t && + echo modified >>init.t && mkdir sub && touch sub/added && git add init.t sub/added && @@ -20,26 +34,22 @@ test_expect_success 'setup' ' git commit -m removed && git tag removed && git checkout top && - git ls-files --stage > result && + git ls-files --stage >result && test_cmp expected result ' -cat >expected.swt <<EOF -H init.t -H sub/added -EOF test_expect_success 'read-tree without .git/info/sparse-checkout' ' git read-tree -m -u HEAD && - git ls-files --stage > result && + git ls-files --stage >result && test_cmp expected result && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result ' test_expect_success 'read-tree with .git/info/sparse-checkout but disabled' ' - echo > .git/info/sparse-checkout + echo >.git/info/sparse-checkout git read-tree -m -u HEAD && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test -f init.t && test -f sub/added @@ -47,9 +57,9 @@ test_expect_success 'read-tree with .git/info/sparse-checkout but disabled' ' test_expect_success 'read-tree --no-sparse-checkout with empty .git/info/sparse-checkout and enabled' ' git config core.sparsecheckout true && - echo > .git/info/sparse-checkout && + echo >.git/info/sparse-checkout && git read-tree --no-sparse-checkout -m -u HEAD && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test -f init.t && test -f sub/added @@ -57,94 +67,113 @@ test_expect_success 'read-tree --no-sparse-checkout with empty .git/info/sparse- test_expect_success 'read-tree with empty .git/info/sparse-checkout' ' git config core.sparsecheckout true && - echo > .git/info/sparse-checkout && + echo >.git/info/sparse-checkout && test_must_fail git read-tree -m -u HEAD && - git ls-files --stage > result && + git ls-files --stage >result && test_cmp expected result && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test -f init.t && test -f sub/added ' -cat >expected.swt <<EOF -S init.t -H sub/added -EOF test_expect_success 'match directories with trailing slash' ' + cat >expected.swt-noinit <<-\EOF && + S init.t + H sub/added + EOF + echo sub/ > .git/info/sparse-checkout && git read-tree -m -u HEAD && git ls-files -t > result && - test_cmp expected.swt result && + test_cmp expected.swt-noinit result && test ! -f init.t && test -f sub/added ' -cat >expected.swt <<EOF -H init.t -H sub/added -EOF test_expect_failure 'match directories without trailing slash' ' - echo init.t > .git/info/sparse-checkout && - echo sub >> .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && + echo sub >>.git/info/sparse-checkout && git read-tree -m -u HEAD && - git ls-files -t > result && + git ls-files -t >result && test_cmp expected.swt result && test ! -f init.t && test -f sub/added ' -cat >expected.swt <<EOF -H init.t -S sub/added -EOF test_expect_success 'checkout area changes' ' - echo init.t > .git/info/sparse-checkout && + cat >expected.swt-nosub <<-\EOF && + H init.t + S sub/added + EOF + + echo init.t >.git/info/sparse-checkout && git read-tree -m -u HEAD && - git ls-files -t > result && - test_cmp expected.swt result && + git ls-files -t >result && + test_cmp expected.swt-nosub result && test -f init.t && test ! -f sub/added ' test_expect_success 'read-tree updates worktree, absent case' ' - echo sub/added > .git/info/sparse-checkout && + echo sub/added >.git/info/sparse-checkout && git checkout -f top && git read-tree -m -u HEAD^ && test ! -f init.t ' test_expect_success 'read-tree updates worktree, dirty case' ' - echo sub/added > .git/info/sparse-checkout && + echo sub/added >.git/info/sparse-checkout && git checkout -f top && - echo dirty > init.t && + echo dirty >init.t && git read-tree -m -u HEAD^ && grep -q dirty init.t && rm init.t ' test_expect_success 'read-tree removes worktree, dirty case' ' - echo init.t > .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && git checkout -f top && - echo dirty > added && + echo dirty >added && git read-tree -m -u HEAD^ && grep -q dirty added ' test_expect_success 'read-tree adds to worktree, absent case' ' - echo init.t > .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && git checkout -f removed && git read-tree -u -m HEAD^ && test ! -f sub/added ' test_expect_success 'read-tree adds to worktree, dirty case' ' - echo init.t > .git/info/sparse-checkout && + echo init.t >.git/info/sparse-checkout && git checkout -f removed && mkdir sub && - echo dirty > sub/added && + echo dirty >sub/added && git read-tree -u -m HEAD^ && grep -q dirty sub/added ' +test_expect_success 'index removal and worktree narrowing at the same time' ' + >empty && + echo init.t >.git/info/sparse-checkout && + echo sub/added >>.git/info/sparse-checkout && + git checkout -f top && + echo init.t >.git/info/sparse-checkout && + git checkout removed && + git ls-files sub/added >result && + test ! -f sub/added && + test_cmp empty result +' + +test_expect_success 'read-tree --reset removes outside worktree' ' + >empty && + echo init.t >.git/info/sparse-checkout && + git checkout -f top && + git reset --hard removed && + git ls-files sub/added >result && + test_cmp empty result +' + test_done diff --git a/t/t1304-default-acl.sh b/t/t1304-default-acl.sh index 97ab02aceb..b5d89a2250 100755 --- a/t/t1304-default-acl.sh +++ b/t/t1304-default-acl.sh @@ -18,9 +18,11 @@ umask 077 setfacl_out="$(setfacl -m u:root:rwx . 2>&1)" setfacl_ret=$? -if [ $setfacl_ret != 0 ]; then - skip_all="Skipping ACL tests: unable to use setfacl (output: '$setfacl_out'; return code: '$setfacl_ret')" - test_done +if test $setfacl_ret != 0 +then + say "Unable to use setfacl (output: '$setfacl_out'; return code: '$setfacl_ret')" +else + test_set_prereq SETFACL fi check_perms_and_acl () { @@ -34,7 +36,7 @@ check_perms_and_acl () { dirs_to_set="./ .git/ .git/objects/ .git/objects/pack/" -test_expect_success 'Setup test repo' ' +test_expect_success SETFACL 'Setup test repo' ' setfacl -m d:u::rwx,d:g::---,d:o:---,d:m:rwx $dirs_to_set && setfacl -m m:rwx $dirs_to_set && setfacl -m u:root:rwx $dirs_to_set && @@ -46,12 +48,12 @@ test_expect_success 'Setup test repo' ' git commit -m "init" ' -test_expect_success 'Objects creation does not break ACLs with restrictive umask' ' +test_expect_success SETFACL 'Objects creation does not break ACLs with restrictive umask' ' # SHA1 for empty blob check_perms_and_acl .git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 ' -test_expect_success 'git gc does not break ACLs with restrictive umask' ' +test_expect_success SETFACL 'git gc does not break ACLs with restrictive umask' ' git gc && check_perms_and_acl .git/objects/pack/*.pack ' diff --git a/t/t1501-worktree.sh b/t/t1501-worktree.sh index bd8b60732b..2c8f01f668 100755 --- a/t/t1501-worktree.sh +++ b/t/t1501-worktree.sh @@ -3,183 +3,320 @@ test_description='test separate work tree' . ./test-lib.sh -test_rev_parse() { - name=$1 - shift - - test_expect_success "$name: is-bare-repository" \ - "test '$1' = \"\$(git rev-parse --is-bare-repository)\"" - shift - [ $# -eq 0 ] && return - - test_expect_success "$name: is-inside-git-dir" \ - "test '$1' = \"\$(git rev-parse --is-inside-git-dir)\"" - shift - [ $# -eq 0 ] && return - - test_expect_success "$name: is-inside-work-tree" \ - "test '$1' = \"\$(git rev-parse --is-inside-work-tree)\"" - shift - [ $# -eq 0 ] && return - - test_expect_success "$name: prefix" \ - "test '$1' = \"\$(git rev-parse --show-prefix)\"" - shift - [ $# -eq 0 ] && return -} - -EMPTY_TREE=$(git write-tree) -mkdir -p work/sub/dir || exit 1 -mkdir -p work2 || exit 1 -mv .git repo.git || exit 1 - -say "core.worktree = relative path" -GIT_DIR=repo.git -GIT_CONFIG="$(pwd)"/$GIT_DIR/config -export GIT_DIR GIT_CONFIG -unset GIT_WORK_TREE -git config core.worktree ../work -test_rev_parse 'outside' false false false -cd work || exit 1 -GIT_DIR=../repo.git -GIT_CONFIG="$(pwd)"/$GIT_DIR/config -test_rev_parse 'inside' false false true '' -cd sub/dir || exit 1 -GIT_DIR=../../../repo.git -GIT_CONFIG="$(pwd)"/$GIT_DIR/config -test_rev_parse 'subdirectory' false false true sub/dir/ -cd ../../.. || exit 1 - -say "core.worktree = absolute path" -GIT_DIR=$(pwd)/repo.git -GIT_CONFIG=$GIT_DIR/config -git config core.worktree "$(pwd)/work" -test_rev_parse 'outside' false false false -cd work2 -test_rev_parse 'outside2' false false false -cd ../work || exit 1 -test_rev_parse 'inside' false false true '' -cd sub/dir || exit 1 -test_rev_parse 'subdirectory' false false true sub/dir/ -cd ../../.. || exit 1 - -say "GIT_WORK_TREE=relative path (override core.worktree)" -GIT_DIR=$(pwd)/repo.git -GIT_CONFIG=$GIT_DIR/config -git config core.worktree non-existent -GIT_WORK_TREE=work -export GIT_WORK_TREE -test_rev_parse 'outside' false false false -cd work2 -test_rev_parse 'outside' false false false -cd ../work || exit 1 -GIT_WORK_TREE=. -test_rev_parse 'inside' false false true '' -cd sub/dir || exit 1 -GIT_WORK_TREE=../.. -test_rev_parse 'subdirectory' false false true sub/dir/ -cd ../../.. || exit 1 - -mv work repo.git/work -mv work2 repo.git/work2 - -say "GIT_WORK_TREE=absolute path, work tree below git dir" -GIT_DIR=$(pwd)/repo.git -GIT_CONFIG=$GIT_DIR/config -GIT_WORK_TREE=$(pwd)/repo.git/work -test_rev_parse 'outside' false false false -cd repo.git || exit 1 -test_rev_parse 'in repo.git' false true false -cd objects || exit 1 -test_rev_parse 'in repo.git/objects' false true false -cd ../work2 || exit 1 -test_rev_parse 'in repo.git/work2' false true false -cd ../work || exit 1 -test_rev_parse 'in repo.git/work' false true true '' -cd sub/dir || exit 1 -test_rev_parse 'in repo.git/sub/dir' false true true sub/dir/ -cd ../../../.. || exit 1 - -test_expect_success 'repo finds its work tree' ' - (cd repo.git && - : > work/sub/dir/untracked && - test sub/dir/untracked = "$(git ls-files --others)") -' - -test_expect_success 'repo finds its work tree from work tree, too' ' - (cd repo.git/work/sub/dir && - : > tracked && - git --git-dir=../../.. add tracked && - cd ../../.. && - test sub/dir/tracked = "$(git ls-files)") +test_expect_success 'setup' ' + EMPTY_TREE=$(git write-tree) && + EMPTY_BLOB=$(git hash-object -t blob --stdin </dev/null) && + CHANGED_BLOB=$(echo changed | git hash-object -t blob --stdin) && + ZEROES=0000000000000000000000000000000000000000 && + EMPTY_BLOB7=$(echo $EMPTY_BLOB | sed "s/\(.......\).*/\1/") && + CHANGED_BLOB7=$(echo $CHANGED_BLOB | sed "s/\(.......\).*/\1/") && + + mkdir -p work/sub/dir && + mkdir -p work2 && + mv .git repo.git +' + +test_expect_success 'setup: helper for testing rev-parse' ' + test_rev_parse() { + echo $1 >expected.bare && + echo $2 >expected.inside-git && + echo $3 >expected.inside-worktree && + if test $# -ge 4 + then + echo $4 >expected.prefix + fi && + + git rev-parse --is-bare-repository >actual.bare && + git rev-parse --is-inside-git-dir >actual.inside-git && + git rev-parse --is-inside-work-tree >actual.inside-worktree && + if test $# -ge 4 + then + git rev-parse --show-prefix >actual.prefix + fi && + + test_cmp expected.bare actual.bare && + test_cmp expected.inside-git actual.inside-git && + test_cmp expected.inside-worktree actual.inside-worktree && + if test $# -ge 4 + then + # rev-parse --show-prefix should output + # a single newline when at the top of the work tree, + # but we test for that separately. + test -z "$4" && ! test -s actual.prefix || + test_cmp expected.prefix actual.prefix + fi + } +' + +test_expect_success 'setup: core.worktree = relative path' ' + unset GIT_WORK_TREE; + GIT_DIR=repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + export GIT_DIR GIT_CONFIG && + git config core.worktree ../work +' + +test_expect_success 'outside' ' + test_rev_parse false false false +' + +test_expect_success 'inside work tree' ' + ( + cd work && + GIT_DIR=../repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + test_rev_parse false false true "" + ) +' + +test_expect_failure 'empty prefix is actually written out' ' + echo >expected && + ( + cd work && + GIT_DIR=../repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + git rev-parse --show-prefix >../actual + ) && + test_cmp expected actual +' + +test_expect_success 'subdir of work tree' ' + ( + cd work/sub/dir && + GIT_DIR=../../../repo.git && + GIT_CONFIG="$(pwd)"/$GIT_DIR/config && + test_rev_parse false false true sub/dir/ + ) +' + +test_expect_success 'setup: core.worktree = absolute path' ' + unset GIT_WORK_TREE; + GIT_DIR=$(pwd)/repo.git && + GIT_CONFIG=$GIT_DIR/config && + export GIT_DIR GIT_CONFIG && + git config core.worktree "$(pwd)/work" +' + +test_expect_success 'outside' ' + test_rev_parse false false false && + ( + cd work2 && + test_rev_parse false false false + ) +' + +test_expect_success 'inside work tree' ' + ( + cd work && + test_rev_parse false false true "" + ) +' + +test_expect_success 'subdir of work tree' ' + ( + cd work/sub/dir && + test_rev_parse false false true sub/dir/ + ) +' + +test_expect_success 'setup: GIT_WORK_TREE=relative (override core.worktree)' ' + GIT_DIR=$(pwd)/repo.git && + GIT_CONFIG=$GIT_DIR/config && + git config core.worktree non-existent && + GIT_WORK_TREE=work && + export GIT_DIR GIT_CONFIG GIT_WORK_TREE +' + +test_expect_success 'outside' ' + test_rev_parse false false false && + ( + cd work2 && + test_rev_parse false false false + ) +' + +test_expect_success 'inside work tree' ' + ( + cd work && + GIT_WORK_TREE=. && + test_rev_parse false false true "" + ) +' + +test_expect_success 'subdir of work tree' ' + ( + cd work/sub/dir && + GIT_WORK_TREE=../.. && + test_rev_parse false false true sub/dir/ + ) +' + +test_expect_success 'setup: GIT_WORK_TREE=absolute, below git dir' ' + mv work repo.git/work && + mv work2 repo.git/work2 && + GIT_DIR=$(pwd)/repo.git && + GIT_CONFIG=$GIT_DIR/config && + GIT_WORK_TREE=$(pwd)/repo.git/work && + export GIT_DIR GIT_CONFIG GIT_WORK_TREE +' + +test_expect_success 'outside' ' + echo outside && + test_rev_parse false false false +' + +test_expect_success 'in repo.git' ' + ( + cd repo.git && + test_rev_parse false true false + ) && + ( + cd repo.git/objects && + test_rev_parse false true false + ) && + ( + cd repo.git/work2 && + test_rev_parse false true false + ) +' + +test_expect_success 'inside work tree' ' + ( + cd repo.git/work && + test_rev_parse false true true "" + ) +' + +test_expect_success 'subdir of work tree' ' + ( + cd repo.git/work/sub/dir && + test_rev_parse false true true sub/dir/ + ) +' + +test_expect_success 'find work tree from repo' ' + echo sub/dir/untracked >expected && + cat <<-\EOF >repo.git/work/.gitignore && + expected.* + actual.* + .gitignore + EOF + >repo.git/work/sub/dir/untracked && + ( + cd repo.git && + git ls-files --others --exclude-standard >../actual + ) && + test_cmp expected actual +' + +test_expect_success 'find work tree from work tree' ' + echo sub/dir/tracked >expected && + >repo.git/work/sub/dir/tracked && + ( + cd repo.git/work/sub/dir && + git --git-dir=../../.. add tracked + ) && + ( + cd repo.git && + git ls-files >../actual + ) && + test_cmp expected actual ' test_expect_success '_gently() groks relative GIT_DIR & GIT_WORK_TREE' ' - (cd repo.git/work/sub/dir && - GIT_DIR=../../.. GIT_WORK_TREE=../.. GIT_PAGER= \ + ( + cd repo.git/work/sub/dir && + GIT_DIR=../../.. && + GIT_WORK_TREE=../.. && + GIT_PAGER= && + export GIT_DIR GIT_WORK_TREE GIT_PAGER && + git diff --exit-code tracked && - echo changed > tracked && - ! GIT_DIR=../../.. GIT_WORK_TREE=../.. GIT_PAGER= \ - git diff --exit-code tracked) -' -cat > diff-index-cached.expected <<\EOF -:000000 100644 0000000000000000000000000000000000000000 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 A sub/dir/tracked -EOF -cat > diff-index.expected <<\EOF -:000000 100644 0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 A sub/dir/tracked -EOF - - -test_expect_success 'git diff-index' ' - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff-index $EMPTY_TREE > result && - test_cmp diff-index.expected result && - GIT_DIR=repo.git git diff-index --cached $EMPTY_TREE > result && - test_cmp diff-index-cached.expected result -' -cat >diff-files.expected <<\EOF -:100644 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0000000000000000000000000000000000000000 M sub/dir/tracked -EOF - -test_expect_success 'git diff-files' ' - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff-files > result && - test_cmp diff-files.expected result -' - -cat >diff-TREE.expected <<\EOF -diff --git a/sub/dir/tracked b/sub/dir/tracked -new file mode 100644 -index 0000000..5ea2ed4 ---- /dev/null -+++ b/sub/dir/tracked -@@ -0,0 +1 @@ -+changed -EOF -cat >diff-TREE-cached.expected <<\EOF -diff --git a/sub/dir/tracked b/sub/dir/tracked -new file mode 100644 -index 0000000..e69de29 -EOF -cat >diff-FILES.expected <<\EOF -diff --git a/sub/dir/tracked b/sub/dir/tracked -index e69de29..5ea2ed4 100644 ---- a/sub/dir/tracked -+++ b/sub/dir/tracked -@@ -0,0 +1 @@ -+changed -EOF - -test_expect_success 'git diff' ' - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff $EMPTY_TREE > result && - test_cmp diff-TREE.expected result && - GIT_DIR=repo.git git diff --cached $EMPTY_TREE > result && - test_cmp diff-TREE-cached.expected result && - GIT_DIR=repo.git GIT_WORK_TREE=repo.git/work git diff > result && - test_cmp diff-FILES.expected result + echo changed >tracked && + test_must_fail git diff --exit-code tracked + ) +' + +test_expect_success 'diff-index respects work tree under .git dir' ' + cat >diff-index-cached.expected <<-EOF && + :000000 100644 $ZEROES $EMPTY_BLOB A sub/dir/tracked + EOF + cat >diff-index.expected <<-EOF && + :000000 100644 $ZEROES $ZEROES A sub/dir/tracked + EOF + + ( + GIT_DIR=repo.git && + GIT_WORK_TREE=repo.git/work && + export GIT_DIR GIT_WORK_TREE && + git diff-index $EMPTY_TREE >diff-index.actual && + git diff-index --cached $EMPTY_TREE >diff-index-cached.actual + ) && + test_cmp diff-index.expected diff-index.actual && + test_cmp diff-index-cached.expected diff-index-cached.actual +' + +test_expect_success 'diff-files respects work tree under .git dir' ' + cat >diff-files.expected <<-EOF && + :100644 100644 $EMPTY_BLOB $ZEROES M sub/dir/tracked + EOF + + ( + GIT_DIR=repo.git && + GIT_WORK_TREE=repo.git/work && + export GIT_DIR GIT_WORK_TREE && + git diff-files >diff-files.actual + ) && + test_cmp diff-files.expected diff-files.actual +' + +test_expect_success 'git diff respects work tree under .git dir' ' + cat >diff-TREE.expected <<-EOF && + diff --git a/sub/dir/tracked b/sub/dir/tracked + new file mode 100644 + index 0000000..$CHANGED_BLOB7 + --- /dev/null + +++ b/sub/dir/tracked + @@ -0,0 +1 @@ + +changed + EOF + cat >diff-TREE-cached.expected <<-EOF && + diff --git a/sub/dir/tracked b/sub/dir/tracked + new file mode 100644 + index 0000000..$EMPTY_BLOB7 + EOF + cat >diff-FILES.expected <<-EOF && + diff --git a/sub/dir/tracked b/sub/dir/tracked + index $EMPTY_BLOB7..$CHANGED_BLOB7 100644 + --- a/sub/dir/tracked + +++ b/sub/dir/tracked + @@ -0,0 +1 @@ + +changed + EOF + + ( + GIT_DIR=repo.git && + GIT_WORK_TREE=repo.git/work && + export GIT_DIR GIT_WORK_TREE && + git diff $EMPTY_TREE >diff-TREE.actual && + git diff --cached $EMPTY_TREE >diff-TREE-cached.actual && + git diff >diff-FILES.actual + ) && + test_cmp diff-TREE.expected diff-TREE.actual && + test_cmp diff-TREE-cached.expected diff-TREE-cached.actual && + test_cmp diff-FILES.expected diff-FILES.actual ' test_expect_success 'git grep' ' - (cd repo.git/work/sub && - GIT_DIR=../.. GIT_WORK_TREE=.. git grep -l changed | grep dir/tracked) + echo dir/tracked >expected.grep && + ( + cd repo.git/work/sub && + GIT_DIR=../.. && + GIT_WORK_TREE=.. && + export GIT_DIR GIT_WORK_TREE && + git grep -l changed >../../../actual.grep + ) && + test_cmp expected.grep actual.grep ' test_expect_success 'git commit' ' @@ -191,14 +328,14 @@ test_expect_success 'git commit' ' test_expect_success 'absolute pathspec should fail gracefully' ' ( - cd repo.git || exit 1 - git config --unset core.worktree + cd repo.git && + test_might_fail git config --unset core.worktree && test_must_fail git log HEAD -- /home ) ' test_expect_success 'make_relative_path handles double slashes in GIT_DIR' ' - : > dummy_file + >dummy_file echo git --git-dir="$(pwd)//repo.git" --work-tree="$(pwd)" add dummy_file && git --git-dir="$(pwd)//repo.git" --work-tree="$(pwd)" add dummy_file ' diff --git a/t/t1503-rev-parse-verify.sh b/t/t1503-rev-parse-verify.sh index cc65394947..100f857b16 100755 --- a/t/t1503-rev-parse-verify.sh +++ b/t/t1503-rev-parse-verify.sh @@ -104,4 +104,15 @@ test_expect_success 'use --default' ' test_must_fail git rev-parse --verify --default bar ' +test_expect_success 'master@{n} for various n' ' + N=$(git reflog | wc -l) && + Nm1=$((N-1)) && + Np1=$((N+1)) && + git rev-parse --verify master@{0} && + git rev-parse --verify master@{1} && + git rev-parse --verify master@{$Nm1} && + test_must_fail git rev-parse --verify master@{$N} && + test_must_fail git rev-parse --verify master@{$Np1} +' + test_done diff --git a/t/t1506-rev-parse-diagnosis.sh b/t/t1506-rev-parse-diagnosis.sh index af721f9719..0eeeb0e450 100755 --- a/t/t1506-rev-parse-diagnosis.sh +++ b/t/t1506-rev-parse-diagnosis.sh @@ -66,4 +66,13 @@ test_expect_success 'incorrect file in :path and :N:path' ' grep "fatal: Path '"'"'disk-only.txt'"'"' exists on disk, but not in the index." error ' +test_expect_success 'invalid @{n} reference' ' + test_must_fail git rev-parse master@{99999} >output 2>error && + test -z "$(cat output)" && + grep "fatal: Log for [^ ]* only has [0-9][0-9]* entries." error && + test_must_fail git rev-parse --verify master@{99999} >output 2>error && + test -z "$(cat output)" && + grep "fatal: Log for [^ ]* only has [0-9][0-9]* entries." error +' + test_done diff --git a/t/t2007-checkout-symlink.sh b/t/t2007-checkout-symlink.sh index 05cc8fdd01..a74ee227b8 100755 --- a/t/t2007-checkout-symlink.sh +++ b/t/t2007-checkout-symlink.sh @@ -6,13 +6,7 @@ test_description='git checkout to switch between branches with symlink<->dir' . ./test-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all="symbolic links not supported - skipping tests" - test_done -fi - -test_expect_success setup ' +test_expect_success SYMLINKS setup ' mkdir frotz && echo hello >frotz/filfre && @@ -38,18 +32,18 @@ test_expect_success setup ' ' -test_expect_success 'switch from symlink to dir' ' +test_expect_success SYMLINKS 'switch from symlink to dir' ' git checkout master ' -test_expect_success 'Remove temporary directories & switch to master' ' +test_expect_success SYMLINKS 'Remove temporary directories & switch to master' ' rm -fr frotz xyzzy nitfol && git checkout -f master ' -test_expect_success 'switch from dir to symlink' ' +test_expect_success SYMLINKS 'switch from dir to symlink' ' git checkout side diff --git a/t/t2013-checkout-submodule.sh b/t/t2013-checkout-submodule.sh index fda3f0af7e..70edbb33e2 100755 --- a/t/t2013-checkout-submodule.sh +++ b/t/t2013-checkout-submodule.sh @@ -39,4 +39,27 @@ test_expect_success '"checkout <submodule>" updates the index only' ' git diff-files --quiet ' +test_expect_success '"checkout <submodule>" honors diff.ignoreSubmodules' ' + git config diff.ignoreSubmodules dirty && + echo x> submodule/untracked && + git checkout HEAD >actual 2>&1 && + ! test -s actual +' + +test_expect_success '"checkout <submodule>" honors submodule.*.ignore from .gitmodules' ' + git config diff.ignoreSubmodules none && + git config -f .gitmodules submodule.submodule.path submodule && + git config -f .gitmodules submodule.submodule.ignore untracked && + git checkout HEAD >actual 2>&1 && + ! test -s actual +' + +test_expect_success '"checkout <submodule>" honors submodule.*.ignore from .git/config' ' + git config -f .gitmodules submodule.submodule.ignore none && + git config submodule.submodule.path submodule && + git config submodule.submodule.ignore all && + git checkout HEAD >actual 2>&1 && + ! test -s actual +' + test_done diff --git a/t/t2016-checkout-patch.sh b/t/t2016-checkout-patch.sh index 2144184d79..7657ec190c 100755 --- a/t/t2016-checkout-patch.sh +++ b/t/t2016-checkout-patch.sh @@ -4,7 +4,7 @@ test_description='git checkout --patch' . ./lib-patch-mode.sh -test_expect_success 'setup' ' +test_expect_success PERL 'setup' ' mkdir dir && echo parent > dir/foo && echo dummy > bar && @@ -18,40 +18,40 @@ test_expect_success 'setup' ' # note: bar sorts before dir/foo, so the first 'n' is always to skip 'bar' -test_expect_success 'saying "n" does nothing' ' +test_expect_success PERL 'saying "n" does nothing' ' set_and_save_state dir/foo work head && (echo n; echo n) | git checkout -p && verify_saved_state bar && verify_saved_state dir/foo ' -test_expect_success 'git checkout -p' ' +test_expect_success PERL 'git checkout -p' ' (echo n; echo y) | git checkout -p && verify_saved_state bar && verify_state dir/foo head head ' -test_expect_success 'git checkout -p with staged changes' ' +test_expect_success PERL 'git checkout -p with staged changes' ' set_state dir/foo work index (echo n; echo y) | git checkout -p && verify_saved_state bar && verify_state dir/foo index index ' -test_expect_success 'git checkout -p HEAD with NO staged changes: abort' ' +test_expect_success PERL 'git checkout -p HEAD with NO staged changes: abort' ' set_and_save_state dir/foo work head && (echo n; echo y; echo n) | git checkout -p HEAD && verify_saved_state bar && verify_saved_state dir/foo ' -test_expect_success 'git checkout -p HEAD with NO staged changes: apply' ' +test_expect_success PERL 'git checkout -p HEAD with NO staged changes: apply' ' (echo n; echo y; echo y) | git checkout -p HEAD && verify_saved_state bar && verify_state dir/foo head head ' -test_expect_success 'git checkout -p HEAD with change already staged' ' +test_expect_success PERL 'git checkout -p HEAD with change already staged' ' set_state dir/foo index index # the third n is to get out in case it mistakenly does not apply (echo n; echo y; echo n) | git checkout -p HEAD && @@ -59,14 +59,14 @@ test_expect_success 'git checkout -p HEAD with change already staged' ' verify_state dir/foo head head ' -test_expect_success 'git checkout -p HEAD^' ' +test_expect_success PERL 'git checkout -p HEAD^' ' # the third n is to get out in case it mistakenly does not apply (echo n; echo y; echo n) | git checkout -p HEAD^ && verify_saved_state bar && verify_state dir/foo parent parent ' -test_expect_success 'git checkout -p handles deletion' ' +test_expect_success PERL 'git checkout -p handles deletion' ' set_state dir/foo work index && rm dir/foo && (echo n; echo y) | git checkout -p && @@ -79,28 +79,28 @@ test_expect_success 'git checkout -p handles deletion' ' # dir/foo. There's always an extra 'n' to reject edits to dir/foo in # the failure case (and thus get out of the loop). -test_expect_success 'path limiting works: dir' ' +test_expect_success PERL 'path limiting works: dir' ' set_state dir/foo work head && (echo y; echo n) | git checkout -p dir && verify_saved_state bar && verify_state dir/foo head head ' -test_expect_success 'path limiting works: -- dir' ' +test_expect_success PERL 'path limiting works: -- dir' ' set_state dir/foo work head && (echo y; echo n) | git checkout -p -- dir && verify_saved_state bar && verify_state dir/foo head head ' -test_expect_success 'path limiting works: HEAD^ -- dir' ' +test_expect_success PERL 'path limiting works: HEAD^ -- dir' ' # the third n is to get out in case it mistakenly does not apply (echo y; echo n; echo n) | git checkout -p HEAD^ -- dir && verify_saved_state bar && verify_state dir/foo parent parent ' -test_expect_success 'path limiting works: foo inside dir' ' +test_expect_success PERL 'path limiting works: foo inside dir' ' set_state dir/foo work head && # the third n is to get out in case it mistakenly does not apply (echo y; echo n; echo n) | (cd dir && git checkout -p foo) && @@ -108,7 +108,7 @@ test_expect_success 'path limiting works: foo inside dir' ' verify_state dir/foo head head ' -test_expect_success 'none of this moved HEAD' ' +test_expect_success PERL 'none of this moved HEAD' ' verify_saved_head ' diff --git a/t/t2018-checkout-branch.sh b/t/t2018-checkout-branch.sh new file mode 100755 index 0000000000..fa69016381 --- /dev/null +++ b/t/t2018-checkout-branch.sh @@ -0,0 +1,172 @@ +#!/bin/sh + +test_description='checkout ' + +. ./test-lib.sh + +# Arguments: <branch> <sha> [<checkout options>] +# +# Runs "git checkout" to switch to <branch>, testing that +# +# 1) we are on the specified branch, <branch>; +# 2) HEAD is <sha>; if <sha> is not specified, the old HEAD is used. +# +# If <checkout options> is not specified, "git checkout" is run with -b. +do_checkout() { + exp_branch=$1 && + exp_ref="refs/heads/$exp_branch" && + + # if <sha> is not specified, use HEAD. + exp_sha=${2:-$(git rev-parse --verify HEAD)} && + + # default options for git checkout: -b + if [ -z "$3" ]; then + opts="-b" + else + opts="$3" + fi + + git checkout $opts $exp_branch $exp_sha && + + test $exp_ref = $(git rev-parse --symbolic-full-name HEAD) && + test $exp_sha = $(git rev-parse --verify HEAD) +} + +test_dirty_unmergeable() { + ! git diff --exit-code >/dev/null +} + +setup_dirty_unmergeable() { + echo >>file1 change2 +} + +test_dirty_mergeable() { + ! git diff --cached --exit-code >/dev/null +} + +setup_dirty_mergeable() { + echo >file2 file2 && + git add file2 +} + +test_expect_success 'setup' ' + test_commit initial file1 && + HEAD1=$(git rev-parse --verify HEAD) && + + test_commit change1 file1 && + HEAD2=$(git rev-parse --verify HEAD) && + + git branch -m branch1 +' + +test_expect_success 'checkout -b to a new branch, set to HEAD' ' + do_checkout branch2 +' + +test_expect_success 'checkout -b to a new branch, set to an explicit ref' ' + git checkout branch1 && + git branch -D branch2 && + + do_checkout branch2 $HEAD1 +' + +test_expect_success 'checkout -b to a new branch with unmergeable changes fails' ' + git checkout branch1 && + + # clean up from previous test + git branch -D branch2 && + + setup_dirty_unmergeable && + test_must_fail do_checkout branch2 $HEAD1 && + test_dirty_unmergeable +' + +test_expect_success 'checkout -f -b to a new branch with unmergeable changes discards changes' ' + # still dirty and on branch1 + do_checkout branch2 $HEAD1 "-f -b" && + test_must_fail test_dirty_unmergeable +' + +test_expect_success 'checkout -b to a new branch preserves mergeable changes' ' + git checkout branch1 && + + # clean up from previous test + git branch -D branch2 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 && + test_dirty_mergeable +' + +test_expect_success 'checkout -f -b to a new branch with mergeable changes discards changes' ' + # clean up from previous test + git reset --hard && + + git checkout branch1 && + + # clean up from previous test + git branch -D branch2 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 "-f -b" && + test_must_fail test_dirty_mergeable +' + +test_expect_success 'checkout -b to an existing branch fails' ' + git reset --hard HEAD && + + test_must_fail do_checkout branch2 $HEAD2 +' + +test_expect_success 'checkout -B to an existing branch resets branch to HEAD' ' + git checkout branch1 && + + do_checkout branch2 "" -B +' + +test_expect_success 'checkout -B to an existing branch from detached HEAD resets branch to HEAD' ' + git checkout $(git rev-parse --verify HEAD) && + + do_checkout branch2 "" -B +' + +test_expect_success 'checkout -B to an existing branch with an explicit ref resets branch to that ref' ' + git checkout branch1 && + + do_checkout branch2 $HEAD1 -B +' + +test_expect_success 'checkout -B to an existing branch with unmergeable changes fails' ' + git checkout branch1 && + + setup_dirty_unmergeable && + test_must_fail do_checkout branch2 $HEAD1 -B && + test_dirty_unmergeable +' + +test_expect_success 'checkout -f -B to an existing branch with unmergeable changes discards changes' ' + # still dirty and on branch1 + do_checkout branch2 $HEAD1 "-f -B" && + test_must_fail test_dirty_unmergeable +' + +test_expect_success 'checkout -B to an existing branch preserves mergeable changes' ' + git checkout branch1 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 -B && + test_dirty_mergeable +' + +test_expect_success 'checkout -f -B to an existing branch with mergeable changes discards changes' ' + # clean up from previous test + git reset --hard && + + git checkout branch1 && + + setup_dirty_mergeable && + do_checkout branch2 $HEAD1 "-f -B" && + test_must_fail test_dirty_mergeable +' + +test_done diff --git a/t/t3030-merge-recursive.sh b/t/t3030-merge-recursive.sh index d541544537..efe2900a37 100755 --- a/t/t3030-merge-recursive.sh +++ b/t/t3030-merge-recursive.sh @@ -294,7 +294,7 @@ test_expect_success 'fail if the index has unresolved entries' ' grep "You have not concluded your merge" out && rm -f .git/MERGE_HEAD && test_must_fail git merge "$c5" 2> out && - grep "Your local changes to .* would be overwritten by merge." out + grep "Your local changes to the following files would be overwritten by merge:" out ' test_expect_success 'merge-recursive remove conflict' ' diff --git a/t/t3300-funny-names.sh b/t/t3300-funny-names.sh index a99e4d8b92..f39a261d80 100755 --- a/t/t3300-funny-names.sh +++ b/t/t3300-funny-names.sh @@ -24,19 +24,25 @@ EOF cat 2>/dev/null >"$p1" "$p0" echo 'Foo Bar Baz' >"$p2" -test -f "$p1" && cmp "$p0" "$p1" || { +if test -f "$p1" && cmp "$p0" "$p1" +then + test_set_prereq TABS_IN_FILENAMES +else # since FAT/NTFS does not allow tabs in filenames, skip this test - skip_all='Your filesystem does not allow tabs in filenames, test skipped.' - test_done -} + say 'Your filesystem does not allow tabs in filenames' +fi +test_expect_success TABS_IN_FILENAMES 'setup expect' " echo 'just space no-funny' >expected -test_expect_success 'git ls-files no-funny' \ +" + +test_expect_success TABS_IN_FILENAMES 'git ls-files no-funny' \ 'git update-index --add "$p0" "$p2" && git ls-files >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' t0=`git write-tree` echo "$t0" >t0 @@ -45,18 +51,24 @@ just space no-funny "tabs\t,\" (dq) and spaces" EOF -test_expect_success 'git ls-files with-funny' \ +' + +test_expect_success TABS_IN_FILENAMES 'git ls-files with-funny' \ 'git update-index --add "$p1" && git ls-files >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' " echo 'just space no-funny -tabs ," (dq) and spaces' >expected -test_expect_success 'git ls-files -z with-funny' \ +tabs ,\" (dq) and spaces' >expected +" + +test_expect_success TABS_IN_FILENAMES 'git ls-files -z with-funny' \ 'git ls-files -z | perl -pe y/\\000/\\012/ >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' t1=`git write-tree` echo "$t1" >t1 @@ -65,60 +77,78 @@ just space no-funny "tabs\t,\" (dq) and spaces" EOF -test_expect_success 'git ls-tree with funny' \ +' + +test_expect_success TABS_IN_FILENAMES 'git ls-tree with funny' \ 'git ls-tree -r $t1 | sed -e "s/^[^ ]* //" >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' cat > expected <<\EOF A "tabs\t,\" (dq) and spaces" EOF -test_expect_success 'git diff-index with-funny' \ +' + +test_expect_success TABS_IN_FILENAMES 'git diff-index with-funny' \ 'git diff-index --name-status $t0 >current && test_cmp expected current' -test_expect_success 'git diff-tree with-funny' \ +test_expect_success TABS_IN_FILENAMES 'git diff-tree with-funny' \ 'git diff-tree --name-status $t0 $t1 >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' " echo 'A -tabs ," (dq) and spaces' >expected -test_expect_success 'git diff-index -z with-funny' \ +tabs ,\" (dq) and spaces' >expected +" + +test_expect_success TABS_IN_FILENAMES 'git diff-index -z with-funny' \ 'git diff-index -z --name-status $t0 | perl -pe y/\\000/\\012/ >current && test_cmp expected current' -test_expect_success 'git diff-tree -z with-funny' \ +test_expect_success TABS_IN_FILENAMES 'git diff-tree -z with-funny' \ 'git diff-tree -z --name-status $t0 $t1 | perl -pe y/\\000/\\012/ >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' cat > expected <<\EOF CNUM no-funny "tabs\t,\" (dq) and spaces" EOF -test_expect_success 'git diff-tree -C with-funny' \ +' + +test_expect_success TABS_IN_FILENAMES 'git diff-tree -C with-funny' \ 'git diff-tree -C --find-copies-harder --name-status \ $t0 $t1 | sed -e 's/^C[0-9]*/CNUM/' >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' cat > expected <<\EOF RNUM no-funny "tabs\t,\" (dq) and spaces" EOF -test_expect_success 'git diff-tree delete with-funny' \ +' + +test_expect_success TABS_IN_FILENAMES 'git diff-tree delete with-funny' \ 'git update-index --force-remove "$p0" && git diff-index -M --name-status \ $t0 | sed -e 's/^R[0-9]*/RNUM/' >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' cat > expected <<\EOF diff --git a/no-funny "b/tabs\t,\" (dq) and spaces" similarity index NUM% rename from no-funny rename to "tabs\t,\" (dq) and spaces" EOF -test_expect_success 'git diff-tree delete with-funny' \ +' + +test_expect_success TABS_IN_FILENAMES 'git diff-tree delete with-funny' \ 'git diff-index -M -p $t0 | sed -e "s/index [0-9]*%/index NUM%/" >current && test_cmp expected current' -chmod +x "$p1" +test_expect_success TABS_IN_FILENAMES 'setup expect' ' +chmod +x "$p1" && cat > expected <<\EOF diff --git a/no-funny "b/tabs\t,\" (dq) and spaces" old mode 100644 @@ -127,31 +157,39 @@ similarity index NUM% rename from no-funny rename to "tabs\t,\" (dq) and spaces" EOF -test_expect_success 'git diff-tree delete with-funny' \ +' + +test_expect_success TABS_IN_FILENAMES 'git diff-tree delete with-funny' \ 'git diff-index -M -p $t0 | sed -e "s/index [0-9]*%/index NUM%/" >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' cat >expected <<\EOF "tabs\t,\" (dq) and spaces" 1 files changed, 0 insertions(+), 0 deletions(-) EOF -test_expect_success 'git diff-tree rename with-funny applied' \ +' + +test_expect_success TABS_IN_FILENAMES 'git diff-tree rename with-funny applied' \ 'git diff-index -M -p $t0 | git apply --stat | sed -e "s/|.*//" -e "s/ *\$//" >current && test_cmp expected current' +test_expect_success TABS_IN_FILENAMES 'setup expect' ' cat > expected <<\EOF no-funny "tabs\t,\" (dq) and spaces" 2 files changed, 3 insertions(+), 3 deletions(-) EOF -test_expect_success 'git diff-tree delete with-funny applied' \ +' + +test_expect_success TABS_IN_FILENAMES 'git diff-tree delete with-funny applied' \ 'git diff-index -p $t0 | git apply --stat | sed -e "s/|.*//" -e "s/ *\$//" >current && test_cmp expected current' -test_expect_success 'git apply non-git diff' \ +test_expect_success TABS_IN_FILENAMES 'git apply non-git diff' \ 'git diff-index -p $t0 | sed -ne "/^[-+@]/p" | git apply --stat | sed -e "s/|.*//" -e "s/ *\$//" >current && diff --git a/t/t3301-notes.sh b/t/t3301-notes.sh index 1d82f79ee0..96b75813d7 100755 --- a/t/t3301-notes.sh +++ b/t/t3301-notes.sh @@ -299,7 +299,7 @@ cat expect-F >> expect-rm-F test_expect_success 'verify note removal with -F /dev/null' ' git log -4 > output && test_cmp expect-rm-F output && - ! git notes show + test_must_fail git notes show ' test_expect_success 'do not create empty note with -m "" (setup)' ' @@ -309,7 +309,7 @@ test_expect_success 'do not create empty note with -m "" (setup)' ' test_expect_success 'verify non-creation of note with -m ""' ' git log -4 > output && test_cmp expect-rm-F output && - ! git notes show + test_must_fail git notes show ' cat > expect-combine_m_and_F << EOF @@ -357,7 +357,7 @@ cat expect-multiline >> expect-rm-remove test_expect_success 'verify note removal with "git notes remove"' ' git log -4 > output && test_cmp expect-rm-remove output && - ! git notes show HEAD^ + test_must_fail git notes show HEAD^ ' cat > expect << EOF diff --git a/t/t3302-notes-index-expensive.sh b/t/t3302-notes-index-expensive.sh index 8ab333dbd9..e35d7811ac 100755 --- a/t/t3302-notes-index-expensive.sh +++ b/t/t3302-notes-index-expensive.sh @@ -7,11 +7,9 @@ test_description='Test commit notes index (expensive!)' . ./test-lib.sh -test -z "$GIT_NOTES_TIMING_TESTS" && { - skip_all="Skipping timing tests" - test_done - exit -} +test_set_prereq NOT_EXPENSIVE +test -n "$GIT_NOTES_TIMING_TESTS" && test_set_prereq EXPENSIVE +test -x /usr/bin/time && test_set_prereq USR_BIN_TIME create_repo () { number_of_commits=$1 @@ -102,17 +100,27 @@ time_notes () { done } -for count in 10 100 1000 10000; do +do_tests () { + pr=$1 + count=$2 + + test_expect_success $pr 'setup / mkdir' ' + mkdir $count && + cd $count + ' - mkdir $count - (cd $count; + test_expect_success $pr "setup $count" "create_repo $count" - test_expect_success "setup $count" "create_repo $count" + test_expect_success $pr 'notes work' "test_notes $count" - test_expect_success 'notes work' "test_notes $count" + test_expect_success USR_BIN_TIME,$pr 'notes timing with /usr/bin/time' "time_notes 100" + + test_expect_success $pr 'teardown / cd ..' 'cd ..' +} - test_expect_success 'notes timing' "time_notes 100" - ) +do_tests NOT_EXPENSIVE 10 +for count in 100 1000 10000; do + do_tests EXPENSIVE $count done test_done diff --git a/t/t3306-notes-prune.sh b/t/t3306-notes-prune.sh index b4554041b4..c4282179b3 100755 --- a/t/t3306-notes-prune.sh +++ b/t/t3306-notes-prune.sh @@ -67,7 +67,7 @@ test_expect_success 'remove some commits' ' test_expect_success 'verify that commits are gone' ' - ! git cat-file -p 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && + test_must_fail git cat-file -p 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && git cat-file -p 08341ad9e94faa089d60fd3f523affb25c6da189 && git cat-file -p ab5f302035f2e7aaf04265f08b42034c23256e1f ' @@ -106,7 +106,7 @@ test_expect_success 'prune notes' ' test_expect_success 'verify that notes are gone' ' - ! git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && + test_must_fail git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && git notes show 08341ad9e94faa089d60fd3f523affb25c6da189 && git notes show ab5f302035f2e7aaf04265f08b42034c23256e1f ' @@ -130,8 +130,8 @@ test_expect_success 'prune -v notes' ' test_expect_success 'verify that notes are gone' ' - ! git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && - ! git notes show 08341ad9e94faa089d60fd3f523affb25c6da189 && + test_must_fail git notes show 5ee1c35e83ea47cd3cc4f8cbee0568915fbbbd29 && + test_must_fail git notes show 08341ad9e94faa089d60fd3f523affb25c6da189 && git notes show ab5f302035f2e7aaf04265f08b42034c23256e1f ' diff --git a/t/t3400-rebase.sh b/t/t3400-rebase.sh index a19aeb6441..349eebd542 100755 --- a/t/t3400-rebase.sh +++ b/t/t3400-rebase.sh @@ -153,7 +153,8 @@ test_expect_success 'setup: recover' ' test_expect_success 'Show verbose error when HEAD could not be detached' ' >B && test_must_fail git rebase topic 2>output.err >output.out && - grep "Untracked working tree file .B. would be overwritten" output.err + grep "The following untracked working tree files would be overwritten by checkout:" output.err && + grep B output.err ' rm -f B diff --git a/t/t3402-rebase-merge.sh b/t/t3402-rebase-merge.sh index 7b7d07269a..2bea65634a 100755 --- a/t/t3402-rebase-merge.sh +++ b/t/t3402-rebase-merge.sh @@ -74,6 +74,15 @@ test_expect_success 'rebase the other way' ' git rebase --merge side ' +test_expect_success 'rebase -Xtheirs' ' + git checkout -b conflicting master~2 && + echo "AB $T" >> original && + git commit -mconflicting original && + git rebase -Xtheirs master && + grep AB original && + ! grep 11 original +' + test_expect_success 'merge and rebase should match' ' git diff-tree -r test-rebase test-merge >difference && if test -s difference diff --git a/t/t3404-rebase-interactive.sh b/t/t3404-rebase-interactive.sh index 9f03ce699e..7d20a74c5c 100755 --- a/t/t3404-rebase-interactive.sh +++ b/t/t3404-rebase-interactive.sh @@ -64,6 +64,67 @@ test_expect_success 'setup' ' done ' +# "exec" commands are ran with the user shell by default, but this may +# be non-POSIX. For example, if SHELL=zsh then ">file" doesn't work +# to create a file. Unseting SHELL avoids such non-portable behavior +# in tests. +SHELL= + +test_expect_success 'rebase -i with the exec command' ' + git checkout master && + ( + FAKE_LINES="1 exec_>touch-one + 2 exec_>touch-two exec_false exec_>touch-three + 3 4 exec_>\"touch-file__name_with_spaces\";_>touch-after-semicolon 5" && + export FAKE_LINES && + test_must_fail git rebase -i A + ) && + test_path_is_file touch-one && + test_path_is_file touch-two && + test_path_is_missing touch-three " (should have stopped before)" && + test $(git rev-parse C) = $(git rev-parse HEAD) || { + echo "Stopped at wrong revision:" + echo "($(git describe --tags HEAD) instead of C)" + false + } && + git rebase --continue && + test_path_is_file touch-three && + test_path_is_file "touch-file name with spaces" && + test_path_is_file touch-after-semicolon && + test $(git rev-parse master) = $(git rev-parse HEAD) || { + echo "Stopped at wrong revision:" + echo "($(git describe --tags HEAD) instead of master)" + false + } && + rm -f touch-* +' + +test_expect_success 'rebase -i with the exec command runs from tree root' ' + git checkout master && + mkdir subdir && (cd subdir && + FAKE_LINES="1 exec_>touch-subdir" \ + git rebase -i HEAD^ + ) && + test_path_is_file touch-subdir && + rm -fr subdir +' + +test_expect_success 'rebase -i with the exec command checks tree cleanness' ' + git checkout master && + ( + FAKE_LINES="exec_echo_foo_>file1 1" && + export FAKE_LINES && + test_must_fail git rebase -i HEAD^ + ) && + test $(git rev-parse master^) = $(git rev-parse HEAD) || { + echo "Stopped at wrong revision:" + echo "($(git describe --tags HEAD) instead of master^)" + false + } && + git reset --hard && + git rebase --continue +' + test_expect_success 'no changes are a nop' ' git checkout branch2 && git rebase -i F && @@ -143,16 +204,17 @@ test_expect_success 'abort' ' git rebase --abort && test $(git rev-parse new-branch1) = $(git rev-parse HEAD) && test "$(git symbolic-ref -q HEAD)" = "refs/heads/branch1" && - ! test -d .git/rebase-merge + test_path_is_missing .git/rebase-merge ' test_expect_success 'abort with error when new base cannot be checked out' ' git rm --cached file1 && git commit -m "remove file in base" && test_must_fail git rebase -i master > output 2>&1 && - grep "Untracked working tree file .file1. would be overwritten" \ + grep "The following untracked working tree files would be overwritten by checkout:" \ output && - ! test -d .git/rebase-merge && + grep "file1" output && + test_path_is_missing .git/rebase-merge && git reset --hard HEAD^ ' diff --git a/t/t3407-rebase-abort.sh b/t/t3407-rebase-abort.sh index 2999e78937..fbb3f2e0df 100755 --- a/t/t3407-rebase-abort.sh +++ b/t/t3407-rebase-abort.sh @@ -38,7 +38,7 @@ testrebase() { # Clean up the state from the previous one git reset --hard pre-rebase && test_must_fail git rebase$type master && - test -d "$dotest" && + test_path_is_dir "$dotest" && git rebase --abort && test $(git rev-parse to-rebase) = $(git rev-parse pre-rebase) && test ! -d "$dotest" @@ -49,7 +49,7 @@ testrebase() { # Clean up the state from the previous one git reset --hard pre-rebase && test_must_fail git rebase$type master && - test -d "$dotest" && + test_path_is_dir "$dotest" && test_must_fail git rebase --skip && test $(git rev-parse HEAD) = $(git rev-parse master) && git rebase --abort && @@ -62,7 +62,7 @@ testrebase() { # Clean up the state from the previous one git reset --hard pre-rebase && test_must_fail git rebase$type master && - test -d "$dotest" && + test_path_is_dir "$dotest" && echo c > a && echo d >> a && git add a && diff --git a/t/t3410-rebase-preserve-dropped-merges.sh b/t/t3410-rebase-preserve-dropped-merges.sh index c49143a1a4..6f73b95558 100755 --- a/t/t3410-rebase-preserve-dropped-merges.sh +++ b/t/t3410-rebase-preserve-dropped-merges.sh @@ -43,11 +43,11 @@ test_expect_success 'setup' ' # G2 = same changes as G test_expect_success 'skip same-resolution merges with -p' ' git checkout H && - ! git merge E && + test_must_fail git merge E && test_commit L file1 23 && git checkout I && test_commit G2 file1 3 && - ! git merge E && + test_must_fail git merge E && test_commit J file1 23 && test_commit K file7 file7 && git rebase -i -p L && @@ -65,11 +65,11 @@ test_expect_success 'skip same-resolution merges with -p' ' # G2 = different changes as G test_expect_success 'keep different-resolution merges with -p' ' git checkout H && - ! git merge E && + test_must_fail git merge E && test_commit L2 file1 23 && git checkout I && test_commit G3 file1 4 && - ! git merge E && + test_must_fail git merge E && test_commit J2 file1 24 && test_commit K2 file7 file7 && test_must_fail git rebase -i -p L2 && diff --git a/t/t3415-rebase-autosquash.sh b/t/t3415-rebase-autosquash.sh index b63f4e2d67..37cb89ab53 100755 --- a/t/t3415-rebase-autosquash.sh +++ b/t/t3415-rebase-autosquash.sh @@ -21,38 +21,62 @@ test_expect_success setup ' git tag base ' -test_expect_success 'auto fixup' ' +test_auto_fixup() { git reset --hard base && echo 1 >file1 && git add -u && test_tick && git commit -m "fixup! first" - git tag final-fixup && + git tag $1 && test_tick && - git rebase --autosquash -i HEAD^^^ && + git rebase $2 -i HEAD^^^ && git log --oneline >actual && test 3 = $(wc -l <actual) && - git diff --exit-code final-fixup && + git diff --exit-code $1 && test 1 = "$(git cat-file blob HEAD^:file1)" && test 1 = $(git cat-file commit HEAD^ | grep first | wc -l) +} + +test_expect_success 'auto fixup (option)' ' + test_auto_fixup final-fixup-option --autosquash +' + +test_expect_success 'auto fixup (config)' ' + git config rebase.autosquash true && + test_auto_fixup final-fixup-config-true && + test_must_fail test_auto_fixup fixup-config-true-no --no-autosquash && + git config rebase.autosquash false && + test_must_fail test_auto_fixup final-fixup-config-false ' -test_expect_success 'auto squash' ' +test_auto_squash() { git reset --hard base && echo 1 >file1 && git add -u && test_tick && git commit -m "squash! first" - git tag final-squash && + git tag $1 && test_tick && - git rebase --autosquash -i HEAD^^^ && + git rebase $2 -i HEAD^^^ && git log --oneline >actual && test 3 = $(wc -l <actual) && - git diff --exit-code final-squash && + git diff --exit-code $1 && test 1 = "$(git cat-file blob HEAD^:file1)" && test 2 = $(git cat-file commit HEAD^ | grep first | wc -l) +} + +test_expect_success 'auto squash (option)' ' + test_auto_squash final-squash --autosquash +' + +test_expect_success 'auto squash (config)' ' + git config rebase.autosquash true && + test_auto_squash final-squash-config-true && + test_must_fail test_auto_squash squash-config-true-no --no-autosquash && + git config rebase.autosquash false && + test_must_fail test_auto_squash final-squash-config-false ' test_expect_success 'misspelled auto squash' ' diff --git a/t/t3505-cherry-pick-empty.sh b/t/t3505-cherry-pick-empty.sh index e51e505a9f..c10b28cf57 100755 --- a/t/t3505-cherry-pick-empty.sh +++ b/t/t3505-cherry-pick-empty.sh @@ -13,12 +13,30 @@ test_expect_success setup ' git checkout -b empty-branch && test_tick && - git commit --allow-empty -m "empty" + git commit --allow-empty -m "empty" && + + echo third >> file1 && + git add file1 && + test_tick && + git commit --allow-empty-message -m "" ' test_expect_success 'cherry-pick an empty commit' ' git checkout master && { + git cherry-pick empty-branch^ + test "$?" = 1 + } +' + +test_expect_success 'index lockfile was removed' ' + + test ! -f .git/index.lock + +' + +test_expect_success 'cherry-pick a commit with an empty message' ' + git checkout master && { git cherry-pick empty-branch test "$?" = 1 } diff --git a/t/t3507-cherry-pick-conflict.sh b/t/t3507-cherry-pick-conflict.sh index e25cf8039a..607bf25d8f 100755 --- a/t/t3507-cherry-pick-conflict.sh +++ b/t/t3507-cherry-pick-conflict.sh @@ -38,6 +38,26 @@ test_expect_success 'failed cherry-pick does not advance HEAD' ' test "$head" = "$newhead" ' +test_expect_success 'advice from failed cherry-pick' " + git checkout -f initial^0 && + git read-tree -u --reset HEAD && + git clean -d -f -f -q -x && + + git update-index --refresh && + git diff-index --exit-code HEAD && + + picked=\$(git rev-parse --short picked) && + cat <<-EOF >expected && + error: could not apply \$picked... picked + hint: after resolving the conflicts, mark the corrected paths + hint: with 'git add <paths>' or 'git rm <paths>' + hint: and commit the result with 'git commit -c \$picked' + EOF + test_must_fail git cherry-pick picked 2>actual && + + test_cmp expected actual +" + test_expect_success 'failed cherry-pick produces dirty index' ' git checkout -f initial^0 && diff --git a/t/t3508-cherry-pick-many-commits.sh b/t/t3508-cherry-pick-many-commits.sh index f90ed3da3e..8e09fd0319 100755 --- a/t/t3508-cherry-pick-many-commits.sh +++ b/t/t3508-cherry-pick-many-commits.sh @@ -4,6 +4,18 @@ test_description='test cherry-picking many commits' . ./test-lib.sh +check_head_differs_from() { + head=$(git rev-parse --verify HEAD) && + arg=$(git rev-parse --verify "$1") && + test "$head" != "$arg" +} + +check_head_equals() { + head=$(git rev-parse --verify HEAD) && + arg=$(git rev-parse --verify "$1") && + test "$head" = "$arg" +} + test_expect_success setup ' echo first > file1 && git add file1 && @@ -23,13 +35,55 @@ test_expect_success setup ' ' test_expect_success 'cherry-pick first..fourth works' ' + cat <<-\EOF >expected && + [master OBJID] second + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + [master OBJID] third + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + [master OBJID] fourth + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + EOF + + git checkout -f master && + git reset --hard first && + test_tick && + git cherry-pick first..fourth >actual && + git diff --quiet other && + git diff --quiet HEAD other && + + sed -e "s/$_x05[0-9a-f][0-9a-f]/OBJID/" <actual >actual.fuzzy && + test_cmp expected actual.fuzzy && + check_head_differs_from fourth +' + +test_expect_success 'cherry-pick --strategy resolve first..fourth works' ' + cat <<-\EOF >expected && + Trying simple merge. + [master OBJID] second + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + Trying simple merge. + [master OBJID] third + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + Trying simple merge. + [master OBJID] fourth + Author: A U Thor <author@example.com> + 1 files changed, 1 insertions(+), 0 deletions(-) + EOF + git checkout -f master && git reset --hard first && test_tick && - git cherry-pick first..fourth && + git cherry-pick --strategy resolve first..fourth >actual && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" != "$(git rev-parse --verify fourth)" + sed -e "s/$_x05[0-9a-f][0-9a-f]/OBJID/" <actual >actual.fuzzy && + test_cmp expected actual.fuzzy && + check_head_differs_from fourth ' test_expect_success 'cherry-pick --ff first..fourth works' ' @@ -39,7 +93,7 @@ test_expect_success 'cherry-pick --ff first..fourth works' ' git cherry-pick --ff first..fourth && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" = "$(git rev-parse --verify fourth)" + check_head_equals fourth ' test_expect_success 'cherry-pick -n first..fourth works' ' @@ -89,7 +143,7 @@ test_expect_success 'cherry-pick -3 fourth works' ' git cherry-pick -3 fourth && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" != "$(git rev-parse --verify fourth)" + check_head_differs_from fourth ' test_expect_success 'cherry-pick --stdin works' ' @@ -99,7 +153,7 @@ test_expect_success 'cherry-pick --stdin works' ' git rev-list --reverse first..fourth | git cherry-pick --stdin && git diff --quiet other && git diff --quiet HEAD other && - test "$(git rev-parse --verify HEAD)" != "$(git rev-parse --verify fourth)" + check_head_differs_from fourth ' test_done diff --git a/t/t3509-cherry-pick-merge-df.sh b/t/t3509-cherry-pick-merge-df.sh new file mode 100755 index 0000000000..a5ccdbf8fc --- /dev/null +++ b/t/t3509-cherry-pick-merge-df.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +test_description='Test cherry-pick with directory/file conflicts' +. ./test-lib.sh + +test_expect_success SYMLINKS 'Setup rename across paths each below D/F conflicts' ' + mkdir a && + >a/f && + git add a && + git commit -m a && + + mkdir b && + ln -s ../a b/a && + git add b && + git commit -m b && + + git checkout -b branch && + rm b/a && + mv a b/a && + ln -s b/a a && + git add . && + git commit -m swap && + + >f1 && + git add f1 && + git commit -m f1 +' + +test_expect_success SYMLINKS 'Cherry-pick succeeds with rename across D/F conflicts' ' + git reset --hard && + git checkout master^0 && + git cherry-pick branch +' + +test_done diff --git a/t/t3600-rm.sh b/t/t3600-rm.sh index b514cbb606..b26cabd571 100755 --- a/t/t3600-rm.sh +++ b/t/t3600-rm.sh @@ -28,22 +28,6 @@ embedded' && git commit -m 'add files with tabs and newlines' " -# Determine rm behavior -# Later we will try removing an unremovable path to make sure -# git rm barfs, but if the test is run as root that cannot be -# arranged. -: >test-file -chmod a-w . -rm -f test-file 2>/dev/null -if test -f test-file -then - test_set_prereq RO_DIR -else - skip_all='skipping removal failure test (perhaps running as root?)' -fi -chmod 775 . -rm -f test-file - test_expect_success \ 'Pre-check that foo exists and is in index before git rm foo' \ '[ -f foo ] && git ls-files --error-unmatch foo' diff --git a/t/t3700-add.sh b/t/t3700-add.sh index 7d7140db38..ec7108358e 100755 --- a/t/t3700-add.sh +++ b/t/t3700-add.sh @@ -179,7 +179,7 @@ test_expect_success 'git add --refresh' ' test -z "`git diff-index HEAD -- foo`" ' -test_expect_success POSIXPERM 'git add should fail atomically upon an unreadable file' ' +test_expect_success POSIXPERM,SANITY 'git add should fail atomically upon an unreadable file' ' git reset --hard && date >foo1 && date >foo2 && @@ -190,7 +190,7 @@ test_expect_success POSIXPERM 'git add should fail atomically upon an unreadable rm -f foo2 -test_expect_success POSIXPERM 'git add --ignore-errors' ' +test_expect_success POSIXPERM,SANITY 'git add --ignore-errors' ' git reset --hard && date >foo1 && date >foo2 && @@ -201,7 +201,7 @@ test_expect_success POSIXPERM 'git add --ignore-errors' ' rm -f foo2 -test_expect_success POSIXPERM 'git add (add.ignore-errors)' ' +test_expect_success POSIXPERM,SANITY 'git add (add.ignore-errors)' ' git config add.ignore-errors 1 && git reset --hard && date >foo1 && @@ -212,7 +212,7 @@ test_expect_success POSIXPERM 'git add (add.ignore-errors)' ' ' rm -f foo2 -test_expect_success POSIXPERM 'git add (add.ignore-errors = false)' ' +test_expect_success POSIXPERM,SANITY 'git add (add.ignore-errors = false)' ' git config add.ignore-errors 0 && git reset --hard && date >foo1 && @@ -223,7 +223,7 @@ test_expect_success POSIXPERM 'git add (add.ignore-errors = false)' ' ' rm -f foo2 -test_expect_success POSIXPERM '--no-ignore-errors overrides config' ' +test_expect_success POSIXPERM,SANITY '--no-ignore-errors overrides config' ' git config add.ignore-errors 1 && git reset --hard && date >foo1 && diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index 7ad8465f8f..d6327e7c74 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -2,22 +2,20 @@ test_description='add -i basic tests' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-prereq-FILEMODE.sh -if ! test_have_prereq PERL; then - skip_all='skipping git add -i tests, perl not available' - test_done -fi - -test_expect_success 'setup (initial)' ' +test_expect_success PERL 'setup (initial)' ' echo content >file && git add file && echo more >>file && echo lines >>file ' -test_expect_success 'status works (initial)' ' +test_expect_success PERL 'status works (initial)' ' git add -i </dev/null >output && grep "+1/-0 *+2/-0 file" output ' + +test_expect_success PERL 'setup expected' ' cat >expected <<EOF new file mode 100644 index 0000000..d95f3ad @@ -26,19 +24,21 @@ index 0000000..d95f3ad @@ -0,0 +1 @@ +content EOF -test_expect_success 'diff works (initial)' ' +' + +test_expect_success PERL 'diff works (initial)' ' (echo d; echo 1) | git add -i >output && sed -ne "/new file/,/content/p" <output >diff && test_cmp expected diff ' -test_expect_success 'revert works (initial)' ' +test_expect_success PERL 'revert works (initial)' ' git add file && (echo r; echo 1) | git add -i && git ls-files >output && ! grep . output ' -test_expect_success 'setup (commit)' ' +test_expect_success PERL 'setup (commit)' ' echo baseline >file && git add file && git commit -m commit && @@ -47,10 +47,12 @@ test_expect_success 'setup (commit)' ' echo more >>file && echo lines >>file ' -test_expect_success 'status works (commit)' ' +test_expect_success PERL 'status works (commit)' ' git add -i </dev/null >output && grep "+1/-0 *+2/-0 file" output ' + +test_expect_success PERL 'setup expected' ' cat >expected <<EOF index 180b47c..b6f2c08 100644 --- a/file @@ -59,60 +61,79 @@ index 180b47c..b6f2c08 100644 baseline +content EOF -test_expect_success 'diff works (commit)' ' +' + +test_expect_success PERL 'diff works (commit)' ' (echo d; echo 1) | git add -i >output && sed -ne "/^index/,/content/p" <output >diff && test_cmp expected diff ' -test_expect_success 'revert works (commit)' ' +test_expect_success PERL 'revert works (commit)' ' git add file && (echo r; echo 1) | git add -i && git add -i </dev/null >output && grep "unchanged *+3/-0 file" output ' + +test_expect_success PERL 'setup expected' ' cat >expected <<EOF EOF -cat >fake_editor.sh <<EOF -EOF -chmod a+x fake_editor.sh -test_set_editor "$(pwd)/fake_editor.sh" -test_expect_success 'dummy edit works' ' +' + +test_expect_success PERL 'setup fake editor' ' + cat >fake_editor.sh <<EOF + EOF + chmod a+x fake_editor.sh && + test_set_editor "$(pwd)/fake_editor.sh" && +' + +test_expect_success PERL 'dummy edit works' ' (echo e; echo a) | git add -p && git diff > diff && test_cmp expected diff ' +test_expect_success PERL 'setup patch' ' cat >patch <<EOF @@ -1,1 +1,4 @@ this +patch --doesn't +-does not apply EOF -echo "#!$SHELL_PATH" >fake_editor.sh -cat >>fake_editor.sh <<\EOF +' + +test_expect_success PERL 'setup fake editor' ' + echo "#!$SHELL_PATH" >fake_editor.sh && + cat >>fake_editor.sh <<\EOF && mv -f "$1" oldpatch && mv -f patch "$1" EOF -chmod a+x fake_editor.sh -test_set_editor "$(pwd)/fake_editor.sh" -test_expect_success 'bad edit rejected' ' + chmod a+x fake_editor.sh && + test_set_editor "$(pwd)/fake_editor.sh" +' + +test_expect_success PERL 'bad edit rejected' ' git reset && (echo e; echo n; echo d) | git add -p >output && grep "hunk does not apply" output ' +test_expect_success PERL 'setup patch' ' cat >patch <<EOF this patch is garbage EOF -test_expect_success 'garbage edit rejected' ' +' + +test_expect_success PERL 'garbage edit rejected' ' git reset && (echo e; echo n; echo d) | git add -p >output && grep "hunk does not apply" output ' +test_expect_success PERL 'setup patch' ' cat >patch <<EOF @@ -1,0 +1,0 @@ baseline @@ -120,6 +141,9 @@ cat >patch <<EOF +newcontent +lines EOF +' + +test_expect_success PERL 'setup expected' ' cat >expected <<EOF diff --git a/file b/file index b5dd6c9..f910ae9 100644 @@ -132,13 +156,15 @@ index b5dd6c9..f910ae9 100644 +more lines EOF -test_expect_success 'real edit works' ' +' + +test_expect_success PERL 'real edit works' ' (echo e; echo n; echo d) | git add -p && git diff >output && test_cmp expected output ' -test_expect_success 'skip files similarly as commit -a' ' +test_expect_success PERL 'skip files similarly as commit -a' ' git reset && echo file >.gitignore && echo changed >file && @@ -152,14 +178,7 @@ test_expect_success 'skip files similarly as commit -a' ' ' rm -f .gitignore -if test "$(git config --bool core.filemode)" = false -then - say '# skipping filemode tests (filesystem does not properly support modes)' -else - test_set_prereq FILEMODE -fi - -test_expect_success FILEMODE 'patch does not affect mode' ' +test_expect_success PERL,FILEMODE 'patch does not affect mode' ' git reset --hard && echo content >>file && chmod +x file && @@ -168,7 +187,7 @@ test_expect_success FILEMODE 'patch does not affect mode' ' git diff file | grep "new mode" ' -test_expect_success FILEMODE 'stage mode but not hunk' ' +test_expect_success PERL,FILEMODE 'stage mode but not hunk' ' git reset --hard && echo content >>file && chmod +x file && @@ -178,7 +197,7 @@ test_expect_success FILEMODE 'stage mode but not hunk' ' ' -test_expect_success FILEMODE 'stage mode and hunk' ' +test_expect_success PERL,FILEMODE 'stage mode and hunk' ' git reset --hard && echo content >>file && chmod +x file && @@ -190,13 +209,14 @@ test_expect_success FILEMODE 'stage mode and hunk' ' # end of tests disabled when filemode is not usable -test_expect_success 'setup again' ' +test_expect_success PERL 'setup again' ' git reset --hard && test_chmod +x file && echo content >>file ' # Write the patch file with a new line at the top and bottom +test_expect_success PERL 'setup patch' ' cat >patch <<EOF index 180b47c..b6f2c08 100644 --- a/file @@ -207,7 +227,10 @@ index 180b47c..b6f2c08 100644 content +lastline EOF +' + # Expected output, similar to the patch but w/ diff at the top +test_expect_success PERL 'setup expected' ' cat >expected <<EOF diff --git a/file b/file index b6f2c08..61b9053 100755 @@ -219,8 +242,10 @@ index b6f2c08..61b9053 100755 content +lastline EOF +' + # Test splitting the first patch, then adding both -test_expect_success 'add first line works' ' +test_expect_success PERL 'add first line works' ' git commit -am "clear local changes" && git apply patch && (echo s; echo y; echo y) | git add -p file && @@ -228,6 +253,7 @@ test_expect_success 'add first line works' ' test_cmp expected diff ' +test_expect_success PERL 'setup expected' ' cat >expected <<EOF diff --git a/non-empty b/non-empty deleted file mode 100644 @@ -237,7 +263,9 @@ index d95f3ad..0000000 @@ -1 +0,0 @@ -content EOF -test_expect_success 'deleting a non-empty file' ' +' + +test_expect_success PERL 'deleting a non-empty file' ' git reset --hard && echo content >non-empty && git add non-empty && @@ -248,13 +276,15 @@ test_expect_success 'deleting a non-empty file' ' test_cmp expected diff ' +test_expect_success PERL 'setup expected' ' cat >expected <<EOF diff --git a/empty b/empty deleted file mode 100644 index e69de29..0000000 EOF +' -test_expect_success 'deleting an empty file' ' +test_expect_success PERL 'deleting an empty file' ' git reset --hard && > empty && git add empty && diff --git a/t/t3902-quoted.sh b/t/t3902-quoted.sh index 147e634cd6..7d49469841 100755 --- a/t/t3902-quoted.sh +++ b/t/t3902-quoted.sh @@ -15,11 +15,13 @@ LF=' DQ='"' echo foo 2>/dev/null > "Name and an${HT}HT" -test -f "Name and an${HT}HT" || { - # since FAT/NTFS does not allow tabs in filenames, skip this test - skip_all='Your filesystem does not allow tabs in filenames, test skipped.' - test_done -} +if ! test -f "Name and an${HT}HT" +then + # FAT/NTFS does not allow tabs in filenames + say 'Your filesystem does not allow tabs in filenames' +else + test_set_prereq TABS_IN_FILENAMES +fi for_each_name () { for name in \ @@ -31,7 +33,7 @@ for_each_name () { done } -test_expect_success setup ' +test_expect_success TABS_IN_FILENAMES 'setup' ' mkdir "$FN" && for_each_name "echo initial >\"\$name\"" @@ -45,6 +47,7 @@ test_expect_success setup ' ' +test_expect_success TABS_IN_FILENAMES 'setup expected files' ' cat >expect.quoted <<\EOF Name "Name and a\nLF" @@ -72,75 +75,76 @@ With SP in it 濱野/file 濱野純 EOF +' -test_expect_success 'check fully quoted output from ls-files' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from ls-files' ' git ls-files >current && test_cmp expect.quoted current ' -test_expect_success 'check fully quoted output from diff-files' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from diff-files' ' git diff --name-only >current && test_cmp expect.quoted current ' -test_expect_success 'check fully quoted output from diff-index' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from diff-index' ' git diff --name-only HEAD >current && test_cmp expect.quoted current ' -test_expect_success 'check fully quoted output from diff-tree' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from diff-tree' ' git diff --name-only HEAD^ HEAD >current && test_cmp expect.quoted current ' -test_expect_success 'check fully quoted output from ls-tree' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from ls-tree' ' git ls-tree --name-only -r HEAD >current && test_cmp expect.quoted current ' -test_expect_success 'setting core.quotepath' ' +test_expect_success TABS_IN_FILENAMES 'setting core.quotepath' ' git config --bool core.quotepath false ' -test_expect_success 'check fully quoted output from ls-files' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from ls-files' ' git ls-files >current && test_cmp expect.raw current ' -test_expect_success 'check fully quoted output from diff-files' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from diff-files' ' git diff --name-only >current && test_cmp expect.raw current ' -test_expect_success 'check fully quoted output from diff-index' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from diff-index' ' git diff --name-only HEAD >current && test_cmp expect.raw current ' -test_expect_success 'check fully quoted output from diff-tree' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from diff-tree' ' git diff --name-only HEAD^ HEAD >current && test_cmp expect.raw current ' -test_expect_success 'check fully quoted output from ls-tree' ' +test_expect_success TABS_IN_FILENAMES 'check fully quoted output from ls-tree' ' git ls-tree --name-only -r HEAD >current && test_cmp expect.raw current diff --git a/t/t3903-stash.sh b/t/t3903-stash.sh index 62e208aadd..d99f27a12f 100755 --- a/t/t3903-stash.sh +++ b/t/t3903-stash.sh @@ -378,4 +378,116 @@ test_expect_failure 'stash file to directory' ' test foo = "$(cat file/file)" ' +test_expect_success 'stash branch - no stashes on stack, stash-like argument' ' + git stash clear && + test_when_finished "git reset --hard HEAD" && + git reset --hard && + echo foo >> file && + STASH_ID=$(git stash create) && + git reset --hard && + git stash branch stash-branch ${STASH_ID} && + test_when_finished "git reset --hard HEAD && git checkout master && git branch -D stash-branch" && + test $(git ls-files --modified | wc -l) -eq 1 +' + +test_expect_success 'stash branch - stashes on stack, stash-like argument' ' + git stash clear && + test_when_finished "git reset --hard HEAD" && + git reset --hard && + echo foo >> file && + git stash && + test_when_finished "git stash drop" && + echo bar >> file && + STASH_ID=$(git stash create) && + git reset --hard && + git stash branch stash-branch ${STASH_ID} && + test_when_finished "git reset --hard HEAD && git checkout master && git branch -D stash-branch" && + test $(git ls-files --modified | wc -l) -eq 1 +' + +test_expect_success 'stash show - stashes on stack, stash-like argument' ' + git stash clear && + test_when_finished "git reset --hard HEAD" && + git reset --hard && + echo foo >> file && + git stash && + test_when_finished "git stash drop" && + echo bar >> file && + STASH_ID=$(git stash create) && + git reset --hard && + git stash show ${STASH_ID} +' +test_expect_success 'stash show - no stashes on stack, stash-like argument' ' + git stash clear && + test_when_finished "git reset --hard HEAD" && + git reset --hard && + echo foo >> file && + STASH_ID=$(git stash create) && + git reset --hard && + git stash show ${STASH_ID} +' + +test_expect_success 'stash drop - fail early if specified stash is not a stash reference' ' + git stash clear && + test_when_finished "git reset --hard HEAD && git stash clear" && + git reset --hard && + echo foo > file && + git stash && + echo bar > file && + git stash && + test_must_fail git stash drop $(git rev-parse stash@{0}) && + git stash pop && + test bar = "$(cat file)" && + git reset --hard HEAD +' + +test_expect_success 'stash pop - fail early if specified stash is not a stash reference' ' + git stash clear && + test_when_finished "git reset --hard HEAD && git stash clear" && + git reset --hard && + echo foo > file && + git stash && + echo bar > file && + git stash && + test_must_fail git stash pop $(git rev-parse stash@{0}) && + git stash pop && + test bar = "$(cat file)" && + git reset --hard HEAD +' + +test_expect_success 'ref with non-existant reflog' ' + git stash clear && + echo bar5 > file && + echo bar6 > file2 && + git add file2 && + git stash && + ! "git rev-parse --quiet --verify does-not-exist" && + test_must_fail git stash drop does-not-exist && + test_must_fail git stash drop does-not-exist@{0} && + test_must_fail git stash pop does-not-exist && + test_must_fail git stash pop does-not-exist@{0} && + test_must_fail git stash apply does-not-exist && + test_must_fail git stash apply does-not-exist@{0} && + test_must_fail git stash show does-not-exist && + test_must_fail git stash show does-not-exist@{0} && + test_must_fail git stash branch tmp does-not-exist && + test_must_fail git stash branch tmp does-not-exist@{0} && + git stash drop +' + +test_expect_success 'invalid ref of the form stash@{n}, n >= N' ' + git stash clear && + test_must_fail git stash drop stash@{0} && + echo bar5 > file && + echo bar6 > file2 && + git add file2 && + git stash && + test_must_fail git drop stash@{1} && + test_must_fail git pop stash@{1} && + test_must_fail git apply stash@{1} && + test_must_fail git show stash@{1} && + test_must_fail git branch tmp stash@{1} && + git stash drop +' + test_done diff --git a/t/t3904-stash-patch.sh b/t/t3904-stash-patch.sh index f37e3bc6ec..d1819ca23a 100755 --- a/t/t3904-stash-patch.sh +++ b/t/t3904-stash-patch.sh @@ -3,7 +3,7 @@ test_description='git checkout --patch' . ./lib-patch-mode.sh -test_expect_success 'setup' ' +test_expect_success PERL 'setup' ' mkdir dir && echo parent > dir/foo && echo dummy > bar && @@ -19,14 +19,14 @@ test_expect_success 'setup' ' # note: bar sorts before dir, so the first 'n' is always to skip 'bar' -test_expect_success 'saying "n" does nothing' ' +test_expect_success PERL 'saying "n" does nothing' ' set_state dir/foo work index (echo n; echo n) | test_must_fail git stash save -p && verify_state dir/foo work index && verify_saved_state bar ' -test_expect_success 'git stash -p' ' +test_expect_success PERL 'git stash -p' ' (echo n; echo y) | git stash save -p && verify_state dir/foo head index && verify_saved_state bar && @@ -36,7 +36,7 @@ test_expect_success 'git stash -p' ' verify_state bar dummy dummy ' -test_expect_success 'git stash -p --no-keep-index' ' +test_expect_success PERL 'git stash -p --no-keep-index' ' set_state dir/foo work index && set_state bar bar_work bar_index && (echo n; echo y) | git stash save -p --no-keep-index && @@ -48,7 +48,7 @@ test_expect_success 'git stash -p --no-keep-index' ' verify_state bar dummy bar_index ' -test_expect_success 'none of this moved HEAD' ' +test_expect_success PERL 'none of this moved HEAD' ' verify_saved_head ' diff --git a/t/t4004-diff-rename-symlink.sh b/t/t4004-diff-rename-symlink.sh index 1a09e8db40..92a65f4852 100755 --- a/t/t4004-diff-rename-symlink.sh +++ b/t/t4004-diff-rename-symlink.sh @@ -12,13 +12,7 @@ by an edit for them. . ./test-lib.sh . "$TEST_DIRECTORY"/diff-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - -test_expect_success \ +test_expect_success SYMLINKS \ 'prepare reference tree' \ 'echo xyzzy | tr -d '\\\\'012 >yomin && ln -s xyzzy frotz && @@ -26,7 +20,7 @@ test_expect_success \ tree=$(git write-tree) && echo $tree' -test_expect_success \ +test_expect_success SYMLINKS \ 'prepare work tree' \ 'mv frotz rezrov && rm -f yomin && @@ -40,8 +34,9 @@ test_expect_success \ # rezrov and nitfol are rename/copy of frotz and bozbar should be # a new creation. -GIT_DIFF_OPTS=--unified=0 git diff-index -M -p $tree >current -cat >expected <<\EOF +test_expect_success SYMLINKS 'setup diff output' " + GIT_DIFF_OPTS=--unified=0 git diff-index -M -p $tree >current && + cat >expected <<\EOF diff --git a/bozbar b/bozbar new file mode 120000 --- /dev/null @@ -65,8 +60,9 @@ deleted file mode 100644 -xyzzy \ No newline at end of file EOF +" -test_expect_success \ +test_expect_success SYMLINKS \ 'validate diff output' \ 'compare_diff_patch current expected' diff --git a/t/t4011-diff-symlink.sh b/t/t4011-diff-symlink.sh index 918a21a2f4..6f6948925f 100755 --- a/t/t4011-diff-symlink.sh +++ b/t/t4011-diff-symlink.sh @@ -9,12 +9,6 @@ test_description='Test diff of symlinks. . ./test-lib.sh . "$TEST_DIRECTORY"/diff-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - cat > expected << EOF diff --git a/frotz b/frotz new file mode 120000 @@ -26,7 +20,7 @@ index 0000000..7c465af \ No newline at end of file EOF -test_expect_success \ +test_expect_success SYMLINKS \ 'diff new symlink' \ 'ln -s xyzzy frotz && git update-index && @@ -35,7 +29,7 @@ test_expect_success \ GIT_DIFF_OPTS=--unified=0 git diff-index -M -p $tree > current && compare_diff_patch current expected' -test_expect_success \ +test_expect_success SYMLINKS \ 'diff unchanged symlink' \ 'tree=$(git write-tree) && git update-index frotz && @@ -52,7 +46,7 @@ index 7c465af..0000000 \ No newline at end of file EOF -test_expect_success \ +test_expect_success SYMLINKS \ 'diff removed symlink' \ 'mv frotz frotz2 && git diff-index -M -p $tree > current && @@ -62,7 +56,7 @@ cat > expected << EOF diff --git a/frotz b/frotz EOF -test_expect_success \ +test_expect_success SYMLINKS \ 'diff identical, but newly created symlink' \ 'ln -s xyzzy frotz && git diff-index -M -p $tree > current && @@ -80,14 +74,14 @@ index 7c465af..df1db54 120000 \ No newline at end of file EOF -test_expect_success \ +test_expect_success SYMLINKS \ 'diff different symlink' \ 'rm frotz && ln -s yxyyz frotz && git diff-index -M -p $tree > current && compare_diff_patch current expected' -test_expect_success \ +test_expect_success SYMLINKS \ 'diff symlinks with non-existing targets' \ 'ln -s narf pinky && ln -s take\ over brain && diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh index dae6358516..19857f4326 100755 --- a/t/t4013-diff-various.sh +++ b/t/t4013-diff-various.sh @@ -208,6 +208,7 @@ log -p --first-parent master log -m -p --first-parent master log -m -p master log -SF master +log -S F master log -SF -p master log --decorate --all log --decorate=full --all @@ -282,4 +283,8 @@ diff master master^ side diff --dirstat master~1 master~2 EOF +test_expect_success 'log -S requires an argument' ' + test_must_fail git log -S +' + test_done diff --git a/t/t4013/diff.log_-S_F_master b/t/t4013/diff.log_-S_F_master new file mode 100644 index 0000000000..978d2b4118 --- /dev/null +++ b/t/t4013/diff.log_-S_F_master @@ -0,0 +1,7 @@ +$ git log -S F master +commit 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0 +Author: A U Thor <author@example.com> +Date: Mon Jun 26 00:02:00 2006 +0000 + + Third +$ diff --git a/t/t4016-diff-quote.sh b/t/t4016-diff-quote.sh index 34e5144eed..ab0c2f0574 100755 --- a/t/t4016-diff-quote.sh +++ b/t/t4016-diff-quote.sh @@ -13,12 +13,14 @@ P1='pathname with HT' P2='pathname with SP' P3='pathname with LF' -: 2>/dev/null >"$P1" && test -f "$P1" && rm -f "$P1" || { - skip_all='Your filesystem does not allow tabs in filenames, test skipped.' - test_done -} +if : 2>/dev/null >"$P1" && test -f "$P1" && rm -f "$P1" +then + test_set_prereq TABS_IN_FILENAMES +else + say 'Your filesystem does not allow tabs in filenames' +fi -test_expect_success setup ' +test_expect_success TABS_IN_FILENAMES setup ' echo P0.0 >"$P0.0" && echo P0.1 >"$P0.1" && echo P0.2 >"$P0.2" && @@ -38,6 +40,7 @@ test_expect_success setup ' : ' +test_expect_success TABS_IN_FILENAMES 'setup expected files' ' cat >expect <<\EOF rename pathname.1 => "Rpathname\twith HT.0" (100%) rename pathname.3 => "Rpathname\nwith LF.0" (100%) @@ -47,11 +50,14 @@ cat >expect <<\EOF rename pathname.0 => Rpathname.0 (100%) rename "pathname\twith HT.0" => Rpathname.1 (100%) EOF -test_expect_success 'git diff --summary -M HEAD' ' +' + +test_expect_success TABS_IN_FILENAMES 'git diff --summary -M HEAD' ' git diff --summary -M HEAD >actual && test_cmp expect actual ' +test_expect_success TABS_IN_FILENAMES 'setup expected files' ' cat >expect <<\EOF pathname.1 => "Rpathname\twith HT.0" | 0 pathname.3 => "Rpathname\nwith LF.0" | 0 @@ -62,7 +68,9 @@ cat >expect <<\EOF "pathname\twith HT.0" => Rpathname.1 | 0 7 files changed, 0 insertions(+), 0 deletions(-) EOF -test_expect_success 'git diff --stat -M HEAD' ' +' + +test_expect_success TABS_IN_FILENAMES 'git diff --stat -M HEAD' ' git diff --stat -M HEAD >actual && test_cmp expect actual ' diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 5b10e976a3..61de8a2718 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -32,7 +32,7 @@ EOF sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java -builtin_patterns="bibtex cpp html java objc pascal php python ruby tex" +builtin_patterns="bibtex cpp csharp html java objc pascal php python ruby tex" for p in $builtin_patterns do test_expect_success "builtin $p pattern compiles" ' diff --git a/t/t4023-diff-rename-typechange.sh b/t/t4023-diff-rename-typechange.sh index 40a95a149e..5d20acf436 100755 --- a/t/t4023-diff-rename-typechange.sh +++ b/t/t4023-diff-rename-typechange.sh @@ -4,13 +4,7 @@ test_description='typechange rename detection' . ./test-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - -test_expect_success setup ' +test_expect_success SYMLINKS setup ' rm -f foo bar && cat "$TEST_DIRECTORY"/../COPYING >foo && @@ -56,7 +50,7 @@ test_expect_success setup ' ' -test_expect_success 'cross renames to be detected for regular files' ' +test_expect_success SYMLINKS 'cross renames to be detected for regular files' ' git diff-tree five six -r --name-status -B -M | sort >actual && { @@ -67,7 +61,7 @@ test_expect_success 'cross renames to be detected for regular files' ' ' -test_expect_success 'cross renames to be detected for typechange' ' +test_expect_success SYMLINKS 'cross renames to be detected for typechange' ' git diff-tree one two -r --name-status -B -M | sort >actual && { @@ -78,7 +72,7 @@ test_expect_success 'cross renames to be detected for typechange' ' ' -test_expect_success 'moves and renames' ' +test_expect_success SYMLINKS 'moves and renames' ' git diff-tree three four -r --name-status -B -M | sort >actual && { diff --git a/t/t4027-diff-submodule.sh b/t/t4027-diff-submodule.sh index 1bd8e5ee3a..d99814ac64 100755 --- a/t/t4027-diff-submodule.sh +++ b/t/t4027-diff-submodule.sh @@ -114,6 +114,69 @@ test_expect_success 'git diff HEAD with dirty submodule (work tree, refs match)' ! test -s actual4 ' +test_expect_success 'git diff HEAD with dirty submodule (work tree, refs match) [.git/config]' ' + git config diff.ignoreSubmodules all && + git diff HEAD >actual && + ! test -s actual && + git config submodule.subname.ignore none && + git config submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config submodule.subname.ignore all && + git diff HEAD >actual2 && + ! test -s actual2 && + git config submodule.subname.ignore untracked && + git diff HEAD >actual3 && + sed -e "1,/^@@/d" actual3 >actual3.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual3.body && + git config submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git diff HEAD --ignore-submodules=none >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname && + git config --unset diff.ignoreSubmodules +' + +test_expect_success 'git diff HEAD with dirty submodule (work tree, refs match) [.gitmodules]' ' + git config diff.ignoreSubmodules dirty && + git diff HEAD >actual && + ! test -s actual && + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config -f .gitmodules submodule.subname.ignore all && + git config -f .gitmodules submodule.subname.path sub && + git diff HEAD >actual2 && + ! test -s actual2 && + git config -f .gitmodules submodule.subname.ignore untracked && + git diff HEAD >actual3 && + sed -e "1,/^@@/d" actual3 >actual3.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual3.body && + git config -f .gitmodules submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git config submodule.subname.ignore none && + git config submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname && + git config --unset diff.ignoreSubmodules && + rm .gitmodules +' + test_expect_success 'git diff HEAD with dirty submodule (index, refs match)' ' ( cd sub && @@ -146,6 +209,103 @@ test_expect_success 'git diff HEAD with dirty submodule (untracked, refs match)' ! test -s actual4 ' +test_expect_success 'git diff HEAD with dirty submodule (untracked, refs match) [.git/config]' ' + git config submodule.subname.ignore all && + git config submodule.subname.path sub && + git diff HEAD >actual2 && + ! test -s actual2 && + git config submodule.subname.ignore untracked && + git diff HEAD >actual3 && + ! test -s actual3 && + git config submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git diff --ignore-submodules=none HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname +' + +test_expect_success 'git diff HEAD with dirty submodule (untracked, refs match) [.gitmodules]' ' + git config --add -f .gitmodules submodule.subname.ignore all && + git config --add -f .gitmodules submodule.subname.path sub && + git diff HEAD >actual2 && + ! test -s actual2 && + git config -f .gitmodules submodule.subname.ignore untracked && + git diff HEAD >actual3 && + ! test -s actual3 && + git config -f .gitmodules submodule.subname.ignore dirty && + git diff HEAD >actual4 && + ! test -s actual4 && + git config submodule.subname.ignore none && + git config submodule.subname.path sub && + git diff HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subprev $subprev-dirty && + test_cmp expect.body actual.body && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname && + rm .gitmodules +' + +test_expect_success 'git diff between submodule commits' ' + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git diff --ignore-submodules=dirty HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git diff --ignore-submodules HEAD^..HEAD >actual && + ! test -s actual +' + +test_expect_success 'git diff between submodule commits [.git/config]' ' + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config submodule.subname.ignore dirty && + git config submodule.subname.path sub && + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config submodule.subname.ignore all && + git diff HEAD^..HEAD >actual && + ! test -s actual && + git diff --ignore-submodules=dirty HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + git config --remove-section submodule.subname +' + +test_expect_success 'git diff between submodule commits [.gitmodules]' ' + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sub && + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + test_cmp expect.body actual.body && + git config -f .gitmodules submodule.subname.ignore all && + git diff HEAD^..HEAD >actual && + ! test -s actual && + git config submodule.subname.ignore dirty && + git config submodule.subname.path sub && + git diff HEAD^..HEAD >actual && + sed -e "1,/^@@/d" actual >actual.body && + expect_from_to >expect.body $subtip $subprev && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname && + rm .gitmodules +' + test_expect_success 'git diff (empty submodule dir)' ' : >empty && rm -rf sub/* sub/.git && diff --git a/t/t4102-apply-rename.sh b/t/t4102-apply-rename.sh index 1597965241..e3ea3d5114 100755 --- a/t/t4102-apply-rename.sh +++ b/t/t4102-apply-rename.sh @@ -7,6 +7,7 @@ test_description='git apply handling copy/rename patch. ' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-prereq-FILEMODE.sh # setup @@ -31,13 +32,6 @@ test_expect_success setup \ test_expect_success apply \ 'git apply --index --stat --summary --apply test-patch' -if test "$(git config --bool core.filemode)" = false -then - say 'filemode disabled on the filesystem' -else - test_set_prereq FILEMODE -fi - test_expect_success FILEMODE validate \ 'test -f bar && ls -l bar | grep "^-..x......"' diff --git a/t/t4111-apply-subdir.sh b/t/t4111-apply-subdir.sh new file mode 100755 index 0000000000..a52d94ae21 --- /dev/null +++ b/t/t4111-apply-subdir.sh @@ -0,0 +1,142 @@ +#!/bin/sh + +test_description='patching from inconvenient places' + +. ./test-lib.sh + +test_expect_success 'setup' ' + cat >patch <<-\EOF && + diff file.orig file + --- a/file.orig + +++ b/file + @@ -1 +1,2 @@ + 1 + +2 + EOF + patch="$(pwd)/patch" && + + echo 1 >preimage && + printf "%s\n" 1 2 >postimage && + echo 3 >other && + + test_tick && + git commit --allow-empty -m basis +' + +test_expect_success 'setup: subdir' ' + reset_subdir() { + git reset && + mkdir -p sub/dir/b && + mkdir -p objects && + cp "$1" file && + cp "$1" objects/file && + cp "$1" sub/dir/file && + cp "$1" sub/dir/b/file && + git add file sub/dir/file sub/dir/b/file objects/file && + cp "$2" file && + cp "$2" sub/dir/file && + cp "$2" sub/dir/b/file && + cp "$2" objects/file && + test_might_fail git update-index --refresh -q + } +' + +test_expect_success 'apply from subdir of toplevel' ' + cp postimage expected && + reset_subdir other preimage && + ( + cd sub/dir && + git apply "$patch" + ) && + test_cmp expected sub/dir/file +' + +test_expect_success 'apply --cached from subdir of toplevel' ' + cp postimage expected && + cp other expected.working && + reset_subdir preimage other && + ( + cd sub/dir && + git apply --cached "$patch" + ) && + git show :sub/dir/file >actual && + test_cmp expected actual && + test_cmp expected.working sub/dir/file +' + +test_expect_success 'apply --index from subdir of toplevel' ' + cp postimage expected && + reset_subdir preimage other && + ( + cd sub/dir && + test_must_fail git apply --index "$patch" + ) && + reset_subdir other preimage && + ( + cd sub/dir && + test_must_fail git apply --index "$patch" + ) && + reset_subdir preimage preimage && + ( + cd sub/dir && + git apply --index "$patch" + ) && + git show :sub/dir/file >actual && + test_cmp expected actual && + test_cmp expected sub/dir/file +' + +test_expect_success 'apply from .git dir' ' + cp postimage expected && + cp preimage .git/file && + cp preimage .git/objects/file + ( + cd .git && + git apply "$patch" + ) && + test_cmp expected .git/file +' + +test_expect_success 'apply from subdir of .git dir' ' + cp postimage expected && + cp preimage .git/file && + cp preimage .git/objects/file + ( + cd .git/objects && + git apply "$patch" + ) && + test_cmp expected .git/objects/file +' + +test_expect_success 'apply --cached from .git dir' ' + cp postimage expected && + cp other expected.working && + cp other .git/file && + reset_subdir preimage other && + ( + cd .git && + git apply --cached "$patch" + ) && + git show :file >actual && + test_cmp expected actual && + test_cmp expected.working file && + test_cmp expected.working .git/file +' + +test_expect_success 'apply --cached from subdir of .git dir' ' + cp postimage expected && + cp preimage expected.subdir && + cp other .git/file && + cp other .git/objects/file && + reset_subdir preimage other && + ( + cd .git/objects && + git apply --cached "$patch" + ) && + git show :file >actual && + git show :objects/file >actual.subdir && + test_cmp expected actual && + test_cmp expected.subdir actual.subdir +' + +test_done diff --git a/t/t4114-apply-typechange.sh b/t/t4114-apply-typechange.sh index 164d58c222..f12826fb09 100755 --- a/t/t4114-apply-typechange.sh +++ b/t/t4114-apply-typechange.sh @@ -9,13 +9,7 @@ test_description='git apply should not get confused with type changes. . ./test-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - -test_expect_success 'setup repository and commits' ' +test_expect_success SYMLINKS 'setup repository and commits' ' echo "hello world" > foo && echo "hi planet" > bar && git update-index --add foo bar && @@ -48,7 +42,7 @@ test_expect_success 'setup repository and commits' ' git branch foo-baz-renamed-from-foo ' -test_expect_success 'file renamed from foo to foo/baz' ' +test_expect_success SYMLINKS 'file renamed from foo to foo/baz' ' git checkout -f initial && git diff-tree -M -p HEAD foo-baz-renamed-from-foo > patch && git apply --index < patch @@ -56,7 +50,7 @@ test_expect_success 'file renamed from foo to foo/baz' ' test_debug 'cat patch' -test_expect_success 'file renamed from foo/baz to foo' ' +test_expect_success SYMLINKS 'file renamed from foo/baz to foo' ' git checkout -f foo-baz-renamed-from-foo && git diff-tree -M -p HEAD initial > patch && git apply --index < patch @@ -64,7 +58,7 @@ test_expect_success 'file renamed from foo/baz to foo' ' test_debug 'cat patch' -test_expect_success 'directory becomes file' ' +test_expect_success SYMLINKS 'directory becomes file' ' git checkout -f foo-becomes-a-directory && git diff-tree -p HEAD initial > patch && git apply --index < patch @@ -72,7 +66,7 @@ test_expect_success 'directory becomes file' ' test_debug 'cat patch' -test_expect_success 'file becomes directory' ' +test_expect_success SYMLINKS 'file becomes directory' ' git checkout -f initial && git diff-tree -p HEAD foo-becomes-a-directory > patch && git apply --index < patch @@ -80,7 +74,7 @@ test_expect_success 'file becomes directory' ' test_debug 'cat patch' -test_expect_success 'file becomes symlink' ' +test_expect_success SYMLINKS 'file becomes symlink' ' git checkout -f initial && git diff-tree -p HEAD foo-symlinked-to-bar > patch && git apply --index < patch @@ -88,21 +82,21 @@ test_expect_success 'file becomes symlink' ' test_debug 'cat patch' -test_expect_success 'symlink becomes file' ' +test_expect_success SYMLINKS 'symlink becomes file' ' git checkout -f foo-symlinked-to-bar && git diff-tree -p HEAD foo-back-to-file > patch && git apply --index < patch ' test_debug 'cat patch' -test_expect_success 'binary file becomes symlink' ' +test_expect_success SYMLINKS 'binary file becomes symlink' ' git checkout -f foo-becomes-binary && git diff-tree -p --binary HEAD foo-symlinked-to-bar > patch && git apply --index < patch ' test_debug 'cat patch' -test_expect_success 'symlink becomes binary file' ' +test_expect_success SYMLINKS 'symlink becomes binary file' ' git checkout -f foo-symlinked-to-bar && git diff-tree -p --binary HEAD foo-becomes-binary > patch && git apply --index < patch @@ -110,7 +104,7 @@ test_expect_success 'symlink becomes binary file' ' test_debug 'cat patch' -test_expect_success 'symlink becomes directory' ' +test_expect_success SYMLINKS 'symlink becomes directory' ' git checkout -f foo-symlinked-to-bar && git diff-tree -p HEAD foo-becomes-a-directory > patch && git apply --index < patch @@ -118,7 +112,7 @@ test_expect_success 'symlink becomes directory' ' test_debug 'cat patch' -test_expect_success 'directory becomes symlink' ' +test_expect_success SYMLINKS 'directory becomes symlink' ' git checkout -f foo-becomes-a-directory && git diff-tree -p HEAD foo-symlinked-to-bar > patch && git apply --index < patch diff --git a/t/t4115-apply-symlink.sh b/t/t4115-apply-symlink.sh index aff4348034..7674dd2ec9 100755 --- a/t/t4115-apply-symlink.sh +++ b/t/t4115-apply-symlink.sh @@ -9,13 +9,7 @@ test_description='git apply symlinks and partial files . ./test-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - -test_expect_success setup ' +test_expect_success SYMLINKS setup ' ln -s path1/path2/path3/path4/path5 link1 && git add link? && @@ -34,7 +28,7 @@ test_expect_success setup ' ' -test_expect_success 'apply symlink patch' ' +test_expect_success SYMLINKS 'apply symlink patch' ' git checkout side && git apply patch && @@ -43,7 +37,7 @@ test_expect_success 'apply symlink patch' ' ' -test_expect_success 'apply --index symlink patch' ' +test_expect_success SYMLINKS 'apply --index symlink patch' ' git checkout -f side && git apply --index patch && diff --git a/t/t4120-apply-popt.sh b/t/t4120-apply-popt.sh index b463b4f05c..2b2d00b334 100755 --- a/t/t4120-apply-popt.sh +++ b/t/t4120-apply-popt.sh @@ -10,21 +10,50 @@ test_description='git apply -p handling.' test_expect_success setup ' mkdir sub && echo A >sub/file1 && - cp sub/file1 file1 && + cp sub/file1 file1.saved && git add sub/file1 && echo B >sub/file1 && git diff >patch.file && - rm sub/file1 && - rmdir sub + git checkout -- sub/file1 && + git mv sub süb && + echo B >süb/file1 && + git diff >patch.escaped && + grep "[\]" patch.escaped && + rm süb/file1 && + rmdir süb ' test_expect_success 'apply git diff with -p2' ' + cp file1.saved file1 && git apply -p2 patch.file ' test_expect_success 'apply with too large -p' ' + cp file1.saved file1 && test_must_fail git apply --stat -p3 patch.file 2>err && grep "removing 3 leading" err ' +test_expect_success 'apply (-p2) traditional diff with funny filenames' ' + cat >patch.quotes <<-\EOF && + diff -u "a/"sub/file1 "b/"sub/file1 + --- "a/"sub/file1 + +++ "b/"sub/file1 + @@ -1 +1 @@ + -A + +B + EOF + echo B >expected && + + cp file1.saved file1 && + git apply -p2 patch.quotes && + test_cmp expected file1 +' + +test_expect_success 'apply with too large -p and fancy filename' ' + cp file1.saved file1 && + test_must_fail git apply --stat -p3 patch.escaped 2>err && + grep "removing 3 leading" err +' + test_done diff --git a/t/t4122-apply-symlink-inside.sh b/t/t4122-apply-symlink-inside.sh index 923fcab7f9..39407376ba 100755 --- a/t/t4122-apply-symlink-inside.sh +++ b/t/t4122-apply-symlink-inside.sh @@ -3,12 +3,6 @@ test_description='apply to deeper directory without getting fooled with symlink' . ./test-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - lecho () { for l_ do @@ -16,7 +10,7 @@ lecho () { done } -test_expect_success setup ' +test_expect_success SYMLINKS setup ' mkdir -p arch/i386/boot arch/x86_64 && lecho 1 2 3 4 5 >arch/i386/boot/Makefile && @@ -37,7 +31,7 @@ test_expect_success setup ' ' -test_expect_success apply ' +test_expect_success SYMLINKS apply ' git checkout test && git diff --exit-code test && @@ -46,7 +40,7 @@ test_expect_success apply ' ' -test_expect_success 'check result' ' +test_expect_success SYMLINKS 'check result' ' git diff --exit-code master && git diff --exit-code --cached master && diff --git a/t/t4129-apply-samemode.sh b/t/t4129-apply-samemode.sh index fc7af04931..0d36ebdc86 100755 --- a/t/t4129-apply-samemode.sh +++ b/t/t4129-apply-samemode.sh @@ -3,13 +3,7 @@ test_description='applying patch with mode bits' . ./test-lib.sh - -if test "$(git config --bool core.filemode)" = false -then - say 'filemode disabled on the filesystem' -else - test_set_prereq FILEMODE -fi +. "$TEST_DIRECTORY"/lib-prereq-FILEMODE.sh test_expect_success setup ' echo original >file && diff --git a/t/t4135-apply-weird-filenames.sh b/t/t4135-apply-weird-filenames.sh new file mode 100755 index 0000000000..1e5aad57ab --- /dev/null +++ b/t/t4135-apply-weird-filenames.sh @@ -0,0 +1,75 @@ +#!/bin/sh + +test_description='git apply with weird postimage filenames' + +. ./test-lib.sh + +test_expect_success 'setup' ' + vector=$TEST_DIRECTORY/t4135 && + + test_tick && + git commit --allow-empty -m preimage && + git tag preimage && + + reset_preimage() { + git checkout -f preimage^0 && + git read-tree -u --reset HEAD && + git update-index --refresh + } && + + test_when_finished "rm -f \"tab embedded.txt\"" && + test_when_finished "rm -f '\''\"quoteembedded\".txt'\''" && + if touch -- "tab embedded.txt" '\''"quoteembedded".txt'\'' + then + test_set_prereq FUNNYNAMES + fi +' + +try_filename() { + desc=$1 + postimage=$2 + prereq=${3:-} + exp1=${4:-success} + exp2=${5:-success} + exp3=${6:-success} + + test_expect_$exp1 $prereq "$desc, git-style file creation patch" " + echo postimage >expected && + reset_preimage && + rm -f '$postimage' && + git apply -v \"\$vector\"/'git-$desc.diff' && + test_cmp expected '$postimage' + " + + test_expect_$exp2 $prereq "$desc, traditional patch" " + echo postimage >expected && + reset_preimage && + echo preimage >'$postimage' && + git apply -v \"\$vector\"/'diff-$desc.diff' && + test_cmp expected '$postimage' + " + + test_expect_$exp3 $prereq "$desc, traditional file creation patch" " + echo postimage >expected && + reset_preimage && + rm -f '$postimage' && + git apply -v \"\$vector\"/'add-$desc.diff' && + test_cmp expected '$postimage' + " +} + +try_filename 'plain' 'postimage.txt' +try_filename 'with spaces' 'post image.txt' +try_filename 'with tab' 'post image.txt' FUNNYNAMES +try_filename 'with backslash' 'post\image.txt' BSLASHPSPEC +try_filename 'with quote' '"postimage".txt' FUNNYNAMES success failure success + +test_expect_success 'whitespace-damaged traditional patch' ' + echo postimage >expected && + reset_preimage && + rm -f postimage.txt && + git apply -v "$vector/damaged.diff" && + test_cmp expected postimage.txt +' + +test_done diff --git a/t/t4135/.gitignore b/t/t4135/.gitignore new file mode 100644 index 0000000000..3e58e65f57 --- /dev/null +++ b/t/t4135/.gitignore @@ -0,0 +1,3 @@ +/file-creation/ +/trad-creation/ +/trad-modification/ diff --git a/t/t4135/add-plain.diff b/t/t4135/add-plain.diff new file mode 100644 index 0000000000..cf5970a089 --- /dev/null +++ b/t/t4135/add-plain.diff @@ -0,0 +1,5 @@ +diff -pruN a/postimage.txt b/postimage.txt +--- a/postimage.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/postimage.txt 2010-08-18 20:13:31.484002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with backslash.diff b/t/t4135/add-with backslash.diff new file mode 100644 index 0000000000..c6861e1966 --- /dev/null +++ b/t/t4135/add-with backslash.diff @@ -0,0 +1,5 @@ +diff -pruN a/post\image.txt b/post\image.txt +--- a/post\image.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/post\image.txt 2010-08-18 20:13:31.692002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with quote.diff b/t/t4135/add-with quote.diff new file mode 100644 index 0000000000..866de78ca1 --- /dev/null +++ b/t/t4135/add-with quote.diff @@ -0,0 +1,5 @@ +diff -pruN a/"postimage".txt b/"postimage".txt +--- a/"postimage".txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/"postimage".txt 2010-08-18 20:13:31.756002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with spaces.diff b/t/t4135/add-with spaces.diff new file mode 100644 index 0000000000..a9a1212a21 --- /dev/null +++ b/t/t4135/add-with spaces.diff @@ -0,0 +1,5 @@ +diff -pruN a/post image.txt b/post image.txt +--- a/post image.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/post image.txt 2010-08-18 20:13:31.556002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/add-with tab.diff b/t/t4135/add-with tab.diff new file mode 100644 index 0000000000..bb67cb7930 --- /dev/null +++ b/t/t4135/add-with tab.diff @@ -0,0 +1,5 @@ +diff -pruN a/post image.txt b/post image.txt +--- a/post image.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/post image.txt 2010-08-18 20:13:31.628002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/damaged.diff b/t/t4135/damaged.diff new file mode 100644 index 0000000000..68f7ededf9 --- /dev/null +++ b/t/t4135/damaged.diff @@ -0,0 +1,5 @@ +diff -pruN a/postimage.txt b/postimage.txt +--- a/postimage.txt 1969-12-31 18:00:00.000000000 -0600 ++++ b/postimage.txt 2010-08-18 20:13:31.484002255 -0500 +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/diff-plain.diff b/t/t4135/diff-plain.diff new file mode 100644 index 0000000000..acedcfa612 --- /dev/null +++ b/t/t4135/diff-plain.diff @@ -0,0 +1,5 @@ +--- postimage.txt.orig 2010-08-18 20:13:31.432002255 -0500 ++++ postimage.txt 2010-08-18 20:13:31.432002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with backslash.diff b/t/t4135/diff-with backslash.diff new file mode 100644 index 0000000000..9068a61bd9 --- /dev/null +++ b/t/t4135/diff-with backslash.diff @@ -0,0 +1,5 @@ +--- post\image.txt.orig 2010-08-18 20:13:31.680002255 -0500 ++++ post\image.txt 2010-08-18 20:13:31.680002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with quote.diff b/t/t4135/diff-with quote.diff new file mode 100644 index 0000000000..c8e8cc1a8d --- /dev/null +++ b/t/t4135/diff-with quote.diff @@ -0,0 +1,5 @@ +--- "postimage".txt.orig 2010-08-18 20:13:31.744002255 -0500 ++++ "postimage".txt 2010-08-18 20:13:31.744002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with spaces.diff b/t/t4135/diff-with spaces.diff new file mode 100644 index 0000000000..3512056f21 --- /dev/null +++ b/t/t4135/diff-with spaces.diff @@ -0,0 +1,5 @@ +--- post image.txt.orig 2010-08-18 20:13:31.544002255 -0500 ++++ post image.txt 2010-08-18 20:13:31.544002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/diff-with tab.diff b/t/t4135/diff-with tab.diff new file mode 100644 index 0000000000..4e6d9b2941 --- /dev/null +++ b/t/t4135/diff-with tab.diff @@ -0,0 +1,5 @@ +--- post image.txt.orig 2010-08-18 20:13:31.616002255 -0500 ++++ post image.txt 2010-08-18 20:13:31.616002255 -0500 +@@ -1 +1 @@ +-preimage ++postimage diff --git a/t/t4135/git-plain.diff b/t/t4135/git-plain.diff new file mode 100644 index 0000000000..db47d1a693 --- /dev/null +++ b/t/t4135/git-plain.diff @@ -0,0 +1,7 @@ +diff --git a/postimage.txt b/postimage.txt +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ b/postimage.txt +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with backslash.diff b/t/t4135/git-with backslash.diff new file mode 100644 index 0000000000..0e84a10e93 --- /dev/null +++ b/t/t4135/git-with backslash.diff @@ -0,0 +1,7 @@ +diff --git "a/post\\image.txt" "b/post\\image.txt" +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ "b/post\\image.txt" +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with quote.diff b/t/t4135/git-with quote.diff new file mode 100644 index 0000000000..bdbea8af35 --- /dev/null +++ b/t/t4135/git-with quote.diff @@ -0,0 +1,7 @@ +diff --git "a/\"postimage\".txt" "b/\"postimage\".txt" +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ "b/\"postimage\".txt" +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with spaces.diff b/t/t4135/git-with spaces.diff new file mode 100644 index 0000000000..baaa810de0 --- /dev/null +++ b/t/t4135/git-with spaces.diff @@ -0,0 +1,7 @@ +diff --git a/post image.txt b/post image.txt +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ b/post image.txt +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/git-with tab.diff b/t/t4135/git-with tab.diff new file mode 100644 index 0000000000..cca3c9287b --- /dev/null +++ b/t/t4135/git-with tab.diff @@ -0,0 +1,7 @@ +diff --git "a/post\timage.txt" "b/post\timage.txt" +new file mode 100644 +index 0000000..eff0c54 +--- /dev/null ++++ "b/post\timage.txt" +@@ -0,0 +1 @@ ++postimage diff --git a/t/t4135/make-patches b/t/t4135/make-patches new file mode 100755 index 0000000000..f5f45ddd09 --- /dev/null +++ b/t/t4135/make-patches @@ -0,0 +1,45 @@ +#!/bin/sh + +do_filename() { + desc=$1 + postimage=$2 + + rm -fr file-creation && + git init file-creation && + ( + cd file-creation && + git commit --allow-empty -m init && + echo postimage >"$postimage" && + git add -N "$postimage" && + git diff HEAD >"../git-$desc.diff" + ) && + + rm -fr trad-modification && + mkdir trad-modification && + ( + cd trad-modification && + echo preimage >"$postimage.orig" && + echo postimage >"$postimage" && + ! diff -u "$postimage.orig" "$postimage" >"../diff-$desc.diff" + ) && + + rm -fr trad-creation && + mkdir trad-creation && + ( + cd trad-creation && + mkdir a b && + echo postimage >"b/$postimage" && + ! diff -pruN a b >"../add-$desc.diff" + ) +} + +do_filename plain postimage.txt && +do_filename 'with spaces' 'post image.txt' && +do_filename 'with tab' 'post image.txt' && +do_filename 'with backslash' 'post\image.txt' && +do_filename 'with quote' '"postimage".txt' && +expand add-plain.diff >damaged.diff || +{ + echo >&2 Failed. && + exit 1 +} diff --git a/t/t4200-rerere.sh b/t/t4200-rerere.sh index 093b138911..36255d608a 100755 --- a/t/t4200-rerere.sh +++ b/t/t4200-rerere.sh @@ -4,245 +4,391 @@ # test_description='git rerere + +! [fifth] version1 + ! [first] first + ! [fourth] version1 + ! [master] initial + ! [second] prefer first over second + ! [third] version2 +------ + + [third] version2 ++ [fifth] version1 + + [fourth] version1 ++ + + [third^] third + - [second] prefer first over second + + + [first] first + + [second^] second +++++++ [master] initial ' . ./test-lib.sh -test_expect_success 'setup' " - cat > a1 <<- EOF && +test_expect_success 'setup' ' + cat >a1 <<-\EOF && Some title ========== - Whether 'tis nobler in the mind to suffer + Whether '\''tis nobler in the mind to suffer The slings and arrows of outrageous fortune, Or to take arms against a sea of troubles, And by opposing end them? To die: to sleep; No more; and by a sleep to say we end The heart-ache and the thousand natural shocks - That flesh is heir to, 'tis a consummation - Devoutly to be wish'd. + That flesh is heir to, '\''tis a consummation + Devoutly to be wish'\''d. EOF git add a1 && + test_tick && git commit -q -a -m initial && - git checkout -b first && - cat >> a1 <<- EOF && + cat >>a1 <<-\EOF && Some title ========== To die, to sleep; - To sleep: perchance to dream: ay, there's the rub; + To sleep: perchance to dream: ay, there'\''s the rub; For in that sleep of death what dreams may come When we have shuffled off this mortal coil, - Must give us pause: there's the respect + Must give us pause: there'\''s the respect That makes calamity of so long life; EOF + + git checkout -b first && + test_tick && git commit -q -a -m first && git checkout -b second master && git show first:a1 | - sed -e 's/To die, t/To die! T/' -e 's/Some title/Some Title/' > a1 && - echo '* END *' >>a1 && + sed -e "s/To die, t/To die! T/" -e "s/Some title/Some Title/" >a1 && + echo "* END *" >>a1 && + test_tick && git commit -q -a -m second -" +' test_expect_success 'nothing recorded without rerere' ' - (rm -rf .git/rr-cache; git config rerere.enabled false) && + rm -rf .git/rr-cache && + git config rerere.enabled false && test_must_fail git merge first && ! test -d .git/rr-cache ' -# activate rerere, old style -test_expect_success 'conflicting merge' ' +test_expect_success 'activate rerere, old style (conflicting merge)' ' git reset --hard && mkdir .git/rr-cache && - git config --unset rerere.enabled && - test_must_fail git merge first -' + test_might_fail git config --unset rerere.enabled && + test_must_fail git merge first && -sha1=$(perl -pe 's/ .*//' .git/MERGE_RR) -rr=.git/rr-cache/$sha1 -test_expect_success 'recorded preimage' "grep ^=======$ $rr/preimage" + sha1=$(perl -pe "s/ .*//" .git/MERGE_RR) && + rr=.git/rr-cache/$sha1 && + grep "^=======\$" $rr/preimage && + ! test -f $rr/postimage && + ! test -f $rr/thisimage +' test_expect_success 'rerere.enabled works, too' ' rm -rf .git/rr-cache && git config rerere.enabled true && git reset --hard && test_must_fail git merge first && + + sha1=$(perl -pe "s/ .*//" .git/MERGE_RR) && + rr=.git/rr-cache/$sha1 && grep ^=======$ $rr/preimage ' -test_expect_success 'no postimage or thisimage yet' \ - "test ! -f $rr/postimage -a ! -f $rr/thisimage" +test_expect_success 'set up rr-cache' ' + rm -rf .git/rr-cache && + git config rerere.enabled true && + git reset --hard && + test_must_fail git merge first && + sha1=$(perl -pe "s/ .*//" .git/MERGE_RR) && + rr=.git/rr-cache/$sha1 +' -test_expect_success 'preimage has right number of lines' ' +test_expect_success 'rr-cache looks sane' ' + # no postimage or thisimage yet + ! test -f $rr/postimage && + ! test -f $rr/thisimage && + # preimage has right number of lines cnt=$(sed -ne "/^<<<<<<</,/^>>>>>>>/p" $rr/preimage | wc -l) && + echo $cnt && test $cnt = 13 - ' -git show first:a1 > a1 - -cat > expect << EOF ---- a/a1 -+++ b/a1 -@@ -1,4 +1,4 @@ --Some Title -+Some title - ========== - Whether 'tis nobler in the mind to suffer - The slings and arrows of outrageous fortune, -@@ -8,21 +8,11 @@ - The heart-ache and the thousand natural shocks - That flesh is heir to, 'tis a consummation - Devoutly to be wish'd. --<<<<<<< --Some Title --========== --To die! To sleep; --======= - Some title - ========== - To die, to sleep; -->>>>>>> - To sleep: perchance to dream: ay, there's the rub; - For in that sleep of death what dreams may come - When we have shuffled off this mortal coil, - Must give us pause: there's the respect - That makes calamity of so long life; --<<<<<<< --======= --* END * -->>>>>>> -EOF -git rerere diff > out - -test_expect_success 'rerere diff' 'test_cmp expect out' - -cat > expect << EOF -a1 -EOF - -git rerere status > out - -test_expect_success 'rerere status' 'test_cmp expect out' - -test_expect_success 'commit succeeds' \ - "git commit -q -a -m 'prefer first over second'" - -test_expect_success 'recorded postimage' "test -f $rr/postimage" - -oldmtimepost=$(test-chmtime -v -60 $rr/postimage |cut -f 1) - -test_expect_success 'another conflicting merge' ' - git checkout -b third master && - git show second^:a1 | sed "s/To die: t/To die! T/" > a1 && - git commit -q -a -m third && - test_must_fail git pull . first +test_expect_success 'rerere diff' ' + git show first:a1 >a1 && + cat >expect <<-\EOF && + --- a/a1 + +++ b/a1 + @@ -1,4 +1,4 @@ + -Some Title + +Some title + ========== + Whether '\''tis nobler in the mind to suffer + The slings and arrows of outrageous fortune, + @@ -8,21 +8,11 @@ + The heart-ache and the thousand natural shocks + That flesh is heir to, '\''tis a consummation + Devoutly to be wish'\''d. + -<<<<<<< + -Some Title + -========== + -To die! To sleep; + -======= + Some title + ========== + To die, to sleep; + ->>>>>>> + To sleep: perchance to dream: ay, there'\''s the rub; + For in that sleep of death what dreams may come + When we have shuffled off this mortal coil, + Must give us pause: there'\''s the respect + That makes calamity of so long life; + -<<<<<<< + -======= + -* END * + ->>>>>>> + EOF + git rerere diff >out && + test_cmp expect out ' -git show first:a1 | sed 's/To die: t/To die! T/' > expect -test_expect_success 'rerere kicked in' "! grep ^=======$ a1" - -test_expect_success 'rerere prefers first change' 'test_cmp a1 expect' - -test_expect_success 'rerere updates postimage timestamp' ' - newmtimepost=$(test-chmtime -v +0 $rr/postimage |cut -f 1) && - test $oldmtimepost -lt $newmtimepost +test_expect_success 'rerere status' ' + echo a1 >expect && + git rerere status >out && + test_cmp expect out ' -rm $rr/postimage -echo "$sha1 a1" | perl -pe 'y/\012/\000/' > .git/MERGE_RR +test_expect_success 'first postimage wins' ' + git show first:a1 | sed "s/To die: t/To die! T/" >expect && -test_expect_success 'rerere clear' 'git rerere clear' + git commit -q -a -m "prefer first over second" && + test -f $rr/postimage && -test_expect_success 'clear removed the directory' "test ! -d $rr" + oldmtimepost=$(test-chmtime -v -60 $rr/postimage | cut -f 1) && -mkdir $rr -echo Hello > $rr/preimage -echo World > $rr/postimage + git checkout -b third master && + git show second^:a1 | sed "s/To die: t/To die! T/" >a1 && + git commit -q -a -m third && -sha2=4000000000000000000000000000000000000000 -rr2=.git/rr-cache/$sha2 -mkdir $rr2 -echo Hello > $rr2/preimage + test_must_fail git pull . first && + # rerere kicked in + ! grep "^=======\$" a1 && + test_cmp expect a1 +' -almost_15_days_ago=$((60-15*86400)) -just_over_15_days_ago=$((-1-15*86400)) -almost_60_days_ago=$((60-60*86400)) -just_over_60_days_ago=$((-1-60*86400)) +test_expect_success 'rerere updates postimage timestamp' ' + newmtimepost=$(test-chmtime -v +0 $rr/postimage | cut -f 1) && + test $oldmtimepost -lt $newmtimepost +' -test-chmtime =$just_over_60_days_ago $rr/preimage -test-chmtime =$almost_60_days_ago $rr/postimage -test-chmtime =$almost_15_days_ago $rr2/preimage +test_expect_success 'rerere clear' ' + rm $rr/postimage && + echo "$sha1 a1" | perl -pe "y/\012/\000/" >.git/MERGE_RR && + git rerere clear && + ! test -d $rr +' -test_expect_success 'garbage collection (part1)' 'git rerere gc' +test_expect_success 'set up for garbage collection tests' ' + mkdir -p $rr && + echo Hello >$rr/preimage && + echo World >$rr/postimage && -test_expect_success 'young or recently used records still live' \ - "test -f $rr/preimage && test -f $rr2/preimage" + sha2=4000000000000000000000000000000000000000 && + rr2=.git/rr-cache/$sha2 && + mkdir $rr2 && + echo Hello >$rr2/preimage && -test-chmtime =$just_over_60_days_ago $rr/postimage -test-chmtime =$just_over_15_days_ago $rr2/preimage + almost_15_days_ago=$((60-15*86400)) && + just_over_15_days_ago=$((-1-15*86400)) && + almost_60_days_ago=$((60-60*86400)) && + just_over_60_days_ago=$((-1-60*86400)) && -test_expect_success 'garbage collection (part2)' 'git rerere gc' + test-chmtime =$just_over_60_days_ago $rr/preimage && + test-chmtime =$almost_60_days_ago $rr/postimage && + test-chmtime =$almost_15_days_ago $rr2/preimage +' -test_expect_success 'old records rest in peace' \ - "test ! -f $rr/preimage && test ! -f $rr2/preimage" +test_expect_success 'gc preserves young or recently used records' ' + git rerere gc && + test -f $rr/preimage && + test -f $rr2/preimage +' -test_expect_success 'file2 added differently in two branches' ' +test_expect_success 'old records rest in peace' ' + test-chmtime =$just_over_60_days_ago $rr/postimage && + test-chmtime =$just_over_15_days_ago $rr2/preimage && + git rerere gc && + ! test -f $rr/preimage && + ! test -f $rr2/preimage +' + +test_expect_success 'setup: file2 added differently in two branches' ' git reset --hard && + git checkout -b fourth && - echo Hallo > file2 && + echo Hallo >file2 && git add file2 && + test_tick && git commit -m version1 && + git checkout third && - echo Bello > file2 && + echo Bello >file2 && git add file2 && + test_tick && git commit -m version2 && + test_must_fail git merge fourth && - echo Cello > file2 && + echo Cello >file2 && git add file2 && git commit -m resolution ' test_expect_success 'resolution was recorded properly' ' + echo Cello >expected && + git reset --hard HEAD~2 && git checkout -b fifth && - echo Hallo > file3 && + + echo Hallo >file3 && git add file3 && + test_tick && git commit -m version1 && + git checkout third && - echo Bello > file3 && + echo Bello >file3 && git add file3 && + test_tick && git commit -m version2 && git tag version2 && + test_must_fail git merge fifth && - test Cello = "$(cat file3)" && - test 0 != $(git ls-files -u | wc -l) + test_cmp expected file3 && + test_must_fail git update-index --refresh ' test_expect_success 'rerere.autoupdate' ' - git config rerere.autoupdate true + git config rerere.autoupdate true && git reset --hard && git checkout version2 && test_must_fail git merge fifth && - test 0 = $(git ls-files -u | wc -l) + git update-index --refresh ' test_expect_success 'merge --rerere-autoupdate' ' - git config --unset rerere.autoupdate + test_might_fail git config --unset rerere.autoupdate && git reset --hard && git checkout version2 && test_must_fail git merge --rerere-autoupdate fifth && - test 0 = $(git ls-files -u | wc -l) + git update-index --refresh ' test_expect_success 'merge --no-rerere-autoupdate' ' - git config rerere.autoupdate true + headblob=$(git rev-parse version2:file3) && + mergeblob=$(git rev-parse fifth:file3) && + cat >expected <<-EOF && + 100644 $headblob 2 file3 + 100644 $mergeblob 3 file3 + EOF + + git config rerere.autoupdate true && git reset --hard && git checkout version2 && test_must_fail git merge --no-rerere-autoupdate fifth && - test 2 = $(git ls-files -u | wc -l) + git ls-files -u >actual && + test_cmp expected actual +' + +test_expect_success 'set up an unresolved merge' ' + headblob=$(git rev-parse version2:file3) && + mergeblob=$(git rev-parse fifth:file3) && + cat >expected.unresolved <<-EOF && + 100644 $headblob 2 file3 + 100644 $mergeblob 3 file3 + EOF + + test_might_fail git config --unset rerere.autoupdate && + git reset --hard && + git checkout version2 && + fifth=$(git rev-parse fifth) && + echo "$fifth branch 'fifth' of ." | + git fmt-merge-msg >msg && + ancestor=$(git merge-base version2 fifth) && + test_must_fail git merge-recursive "$ancestor" -- HEAD fifth && + + git ls-files --stage >failedmerge && + cp file3 file3.conflict && + + git ls-files -u >actual && + test_cmp expected.unresolved actual +' + +test_expect_success 'explicit rerere' ' + test_might_fail git config --unset rerere.autoupdate && + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + test_must_fail git update-index --refresh -q && + + git rerere && + git ls-files -u >actual && + test_cmp expected.unresolved actual +' + +test_expect_success 'explicit rerere with autoupdate' ' + git config rerere.autoupdate true && + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + test_must_fail git update-index --refresh -q && + + git rerere && + git update-index --refresh +' + +test_expect_success 'explicit rerere --rerere-autoupdate overrides' ' + git config rerere.autoupdate false && + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere && + git ls-files -u >actual1 && + + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere --rerere-autoupdate && + git update-index --refresh && + + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere --rerere-autoupdate --no-rerere-autoupdate && + git ls-files -u >actual2 && + + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + git rerere --rerere-autoupdate --no-rerere-autoupdate --rerere-autoupdate && + git update-index --refresh && + + test_cmp expected.unresolved actual1 && + test_cmp expected.unresolved actual2 +' + +test_expect_success 'rerere --no-no-rerere-autoupdate' ' + git rm -fr --cached . && + git update-index --index-info <failedmerge && + cp file3.conflict file3 && + test_must_fail git rerere --no-no-rerere-autoupdate 2>err && + grep [Uu]sage err && + test_must_fail git update-index --refresh +' + +test_expect_success 'rerere -h' ' + test_must_fail git rerere -h >help && + grep [Uu]sage help ' test_done diff --git a/t/t4202-log.sh b/t/t4202-log.sh index ead204e5cb..2e51356947 100755 --- a/t/t4202-log.sh +++ b/t/t4202-log.sh @@ -100,13 +100,11 @@ test_expect_success 'oneline' ' test_expect_success 'diff-filter=A' ' - actual=$(git log --pretty="format:%s" --diff-filter=A HEAD) && - expect=$(echo fifth ; echo fourth ; echo third ; echo initial) && - test "$actual" = "$expect" || { - echo Oops - echo "Actual: $actual" - false - } + git log --pretty="format:%s" --diff-filter=A HEAD > actual && + git log --pretty="format:%s" --diff-filter A HEAD > actual-separate && + printf "fifth\nfourth\nthird\ninitial" > expect && + test_cmp expect actual && + test_cmp expect actual-separate ' @@ -203,6 +201,13 @@ test_expect_success 'log --grep' ' test_cmp expect actual ' +test_expect_success 'log --grep option parsing' ' + echo second >expect && + git log -1 --pretty="tformat:%s" --grep sec >actual && + test_cmp expect actual && + test_must_fail git log -1 --pretty="tformat:%s" --grep +' + test_expect_success 'log -i --grep' ' echo Second >expect && git log -1 --pretty="tformat:%s" -i --grep=sec >actual && diff --git a/t/t5503-tagfollow.sh b/t/t5503-tagfollow.sh index bab1a536f5..8a298a655f 100755 --- a/t/t5503-tagfollow.sh +++ b/t/t5503-tagfollow.sh @@ -6,8 +6,11 @@ test_description='test automatic tag following' case $(uname -s) in *MINGW*) - skip_all="GIT_DEBUG_SEND_PACK not supported - skipping tests" - test_done + say "GIT_DEBUG_SEND_PACK not supported - skipping tests" + ;; +*) + test_set_prereq NOT_MINGW + ;; esac # End state of the repository: @@ -19,7 +22,7 @@ esac # \ C - origin/cat \ # origin/master master -test_expect_success setup ' +test_expect_success NOT_MINGW setup ' test_tick && echo ichi >file && git add file && @@ -42,12 +45,15 @@ test_expect_success setup ' U=UPLOAD_LOG +test_expect_success NOT_MINGW 'setup expect' ' cat - <<EOF >expect #S want $A #E EOF -test_expect_success 'fetch A (new commit : 1 connection)' ' +' + +test_expect_success NOT_MINGW 'fetch A (new commit : 1 connection)' ' rm -f $U ( cd cloned && @@ -59,7 +65,7 @@ test_expect_success 'fetch A (new commit : 1 connection)' ' test_cmp expect actual ' -test_expect_success "create tag T on A, create C on branch cat" ' +test_expect_success NOT_MINGW "create tag T on A, create C on branch cat" ' git tag -a -m tag1 tag1 $A && T=$(git rev-parse --verify tag1) && @@ -71,13 +77,16 @@ test_expect_success "create tag T on A, create C on branch cat" ' git checkout master ' +test_expect_success NOT_MINGW 'setup expect' ' cat - <<EOF >expect #S want $C want $T #E EOF -test_expect_success 'fetch C, T (new branch, tag : 1 connection)' ' +' + +test_expect_success NOT_MINGW 'fetch C, T (new branch, tag : 1 connection)' ' rm -f $U ( cd cloned && @@ -91,7 +100,7 @@ test_expect_success 'fetch C, T (new branch, tag : 1 connection)' ' test_cmp expect actual ' -test_expect_success "create commits O, B, tag S on B" ' +test_expect_success NOT_MINGW "create commits O, B, tag S on B" ' test_tick && echo O >file && git add file && @@ -107,13 +116,16 @@ test_expect_success "create commits O, B, tag S on B" ' S=$(git rev-parse --verify tag2) ' +test_expect_success NOT_MINGW 'setup expect' ' cat - <<EOF >expect #S want $B want $S #E EOF -test_expect_success 'fetch B, S (commit and tag : 1 connection)' ' +' + +test_expect_success NOT_MINGW 'fetch B, S (commit and tag : 1 connection)' ' rm -f $U ( cd cloned && @@ -127,13 +139,16 @@ test_expect_success 'fetch B, S (commit and tag : 1 connection)' ' test_cmp expect actual ' +test_expect_success NOT_MINGW 'setup expect' ' cat - <<EOF >expect #S want $B want $S #E EOF -test_expect_success 'new clone fetch master and tags' ' +' + +test_expect_success NOT_MINGW 'new clone fetch master and tags' ' git branch -D cat rm -f $U ( diff --git a/t/t5522-pull-symlink.sh b/t/t5522-pull-symlink.sh index 298200fa4c..8e9b204e02 100755 --- a/t/t5522-pull-symlink.sh +++ b/t/t5522-pull-symlink.sh @@ -4,12 +4,6 @@ test_description='pulling from symlinked subdir' . ./test-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - # The scenario we are building: # # trash\ directory/ @@ -20,7 +14,7 @@ fi # # The working directory is subdir-link. -test_expect_success setup ' +test_expect_success SYMLINKS setup ' mkdir subdir && echo file >subdir/file && git add subdir/file && @@ -36,7 +30,7 @@ test_expect_success setup ' # Demonstrate that things work if we just avoid the symlink # -test_expect_success 'pulling from real subdir' ' +test_expect_success SYMLINKS 'pulling from real subdir' ' ( echo real >subdir/file && git commit -m real subdir/file && @@ -64,7 +58,7 @@ test_expect_success 'pulling from real subdir' ' # directory. A POSIX shell's "cd" works a little differently # than chdir() in C; "cd -P" is much closer to chdir(). # -test_expect_success 'pulling from symlinked subdir' ' +test_expect_success SYMLINKS 'pulling from symlinked subdir' ' ( echo link >subdir/file && git commit -m link subdir/file && @@ -77,7 +71,7 @@ test_expect_success 'pulling from symlinked subdir' ' # Prove that the remote end really is a repo, and other commands # work fine in this context. It's just that "git pull" breaks. # -test_expect_success 'pushing from symlinked subdir' ' +test_expect_success SYMLINKS 'pushing from symlinked subdir' ' ( cd subdir-link/ && echo push >file && diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh index 044603c26e..6b2a5f4a65 100755 --- a/t/t5530-upload-pack-error.sh +++ b/t/t5530-upload-pack-error.sh @@ -60,6 +60,15 @@ test_expect_success 'upload-pack fails due to error in rev-list' ' grep "bad tree object" output.err ' +test_expect_success 'upload-pack error message when bad ref requested' ' + + printf "0045want %s multi_ack_detailed\n00000009done\n0000" \ + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" >input && + test_must_fail git upload-pack . <input >output 2>output.err && + grep -q "not our ref" output.err && + ! grep -q multi_ack_detailed output.err +' + test_expect_success 'upload-pack fails due to error in pack-objects enumeration' ' printf "0032want %s\n00000009done\n0000" \ diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 8abb71afcd..987e0c8463 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -163,8 +163,6 @@ test_expect_success 'clone a void' ' test_expect_success 'clone respects global branch.autosetuprebase' ' ( - HOME=$(pwd) && - export HOME && test_config="$HOME/.gitconfig" && unset GIT_CONFIG_NOGLOBAL && git config -f "$test_config" branch.autosetuprebase remote && @@ -178,8 +176,14 @@ test_expect_success 'clone respects global branch.autosetuprebase' ' test_expect_success 'respect url-encoding of file://' ' git init x+y && - test_must_fail git clone "file://$PWD/x+y" xy-url && - git clone "file://$PWD/x%2By" xy-url + git clone "file://$PWD/x+y" xy-url-1 && + git clone "file://$PWD/x%2By" xy-url-2 +' + +test_expect_success 'do not query-string-decode + in URLs' ' + rm -rf x+y && + git init "x y" && + test_must_fail git clone "file://$PWD/x+y" xy-no-plus ' test_expect_success 'do not respect url-encoding of non-url path' ' diff --git a/t/t5705-clone-2gb.sh b/t/t5705-clone-2gb.sh index e4d1b6a0fa..e9783c341a 100755 --- a/t/t5705-clone-2gb.sh +++ b/t/t5705-clone-2gb.sh @@ -3,12 +3,14 @@ test_description='Test cloning a repository larger than 2 gigabyte' . ./test-lib.sh -test -z "$GIT_TEST_CLONE_2GB" && -skip_all="Skipping expensive 2GB clone test; enable it with GIT_TEST_CLONE_2GB=t" && -test_done && -exit +if test -z "$GIT_TEST_CLONE_2GB" +then + say 'Skipping expensive 2GB clone test; enable it with GIT_TEST_CLONE_2GB=t' +else + test_set_prereq CLONE_2GB +fi -test_expect_success 'setup' ' +test_expect_success CLONE_2GB 'setup' ' git config pack.compression 0 && git config pack.depth 0 && @@ -36,13 +38,13 @@ test_expect_success 'setup' ' ' -test_expect_success 'clone - bare' ' +test_expect_success CLONE_2GB 'clone - bare' ' git clone --bare --no-hardlinks . clone-bare ' -test_expect_success 'clone - with worktree, file:// protocol' ' +test_expect_success CLONE_2GB 'clone - with worktree, file:// protocol' ' git clone file://. clone-wt diff --git a/t/t5800-remote-helpers.sh b/t/t5800-remote-helpers.sh index 637d8e97ac..1fb6380fce 100755 --- a/t/t5800-remote-helpers.sh +++ b/t/t5800-remote-helpers.sh @@ -13,13 +13,11 @@ if sys.hexversion < 0x02040000: sys.exit(1) ' then - : -else - skip_all='skipping git remote-testgit tests: requires Python 2.4 or newer' - test_done + # Requires Python 2.4 or newer + test_set_prereq PYTHON_24 fi -test_expect_success 'setup repository' ' +test_expect_success PYTHON_24 'setup repository' ' git init --bare server/.git && git clone server public && (cd public && @@ -29,34 +27,34 @@ test_expect_success 'setup repository' ' git push origin master) ' -test_expect_success 'cloning from local repo' ' +test_expect_success PYTHON_24 'cloning from local repo' ' git clone "testgit::${PWD}/server" localclone && test_cmp public/file localclone/file ' -test_expect_success 'cloning from remote repo' ' +test_expect_success PYTHON_24 'cloning from remote repo' ' git clone "testgit::file://${PWD}/server" clone && test_cmp public/file clone/file ' -test_expect_success 'create new commit on remote' ' +test_expect_success PYTHON_24 'create new commit on remote' ' (cd public && echo content >>file && git commit -a -m two && git push) ' -test_expect_success 'pulling from local repo' ' +test_expect_success PYTHON_24 'pulling from local repo' ' (cd localclone && git pull) && test_cmp public/file localclone/file ' -test_expect_success 'pulling from remote remote' ' +test_expect_success PYTHON_24 'pulling from remote remote' ' (cd clone && git pull) && test_cmp public/file clone/file ' -test_expect_success 'pushing to local repo' ' +test_expect_success PYTHON_24 'pushing to local repo' ' (cd localclone && echo content >>file && git commit -a -m three && @@ -65,12 +63,12 @@ test_expect_success 'pushing to local repo' ' test $HEAD = $(git --git-dir=server/.git rev-parse --verify HEAD) ' -test_expect_success 'synch with changes from localclone' ' +test_expect_success PYTHON_24 'synch with changes from localclone' ' (cd clone && git pull) ' -test_expect_success 'pushing remote local repo' ' +test_expect_success PYTHON_24 'pushing remote local repo' ' (cd clone && echo content >>file && git commit -a -m four && diff --git a/t/t6010-merge-base.sh b/t/t6010-merge-base.sh index 0144d9e858..62197a3d35 100755 --- a/t/t6010-merge-base.sh +++ b/t/t6010-merge-base.sh @@ -3,175 +3,231 @@ # Copyright (c) 2005 Junio C Hamano # -test_description='Merge base computation. +test_description='Merge base and parent list computation. ' . ./test-lib.sh -T=$(git write-tree) - -M=1130000000 -Z=+0000 - -GIT_COMMITTER_EMAIL=git@comm.iter.xz -GIT_COMMITTER_NAME='C O Mmiter' -GIT_AUTHOR_NAME='A U Thor' -GIT_AUTHOR_EMAIL=git@au.thor.xz -export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL - -doit() { - OFFSET=$1; shift - NAME=$1; shift - PARENTS= - for P - do - PARENTS="${PARENTS}-p $P " - done - GIT_COMMITTER_DATE="$(($M + $OFFSET)) $Z" - GIT_AUTHOR_DATE=$GIT_COMMITTER_DATE - export GIT_COMMITTER_DATE GIT_AUTHOR_DATE - commit=$(echo $NAME | git commit-tree $T $PARENTS) - echo $commit >.git/refs/tags/$NAME - echo $commit -} - -# E---D---C---B---A -# \'-_ \ \ -# \ `---------G \ -# \ \ -# F----------------H - -# Setup... -E=$(doit 5 E) -D=$(doit 4 D $E) -F=$(doit 6 F $E) -C=$(doit 3 C $D) -B=$(doit 2 B $C) -A=$(doit 1 A $B) -G=$(doit 7 G $B $E) -H=$(doit 8 H $A $F) - -test_expect_success 'compute merge-base (single)' \ - 'MB=$(git merge-base G H) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/B"' - -test_expect_success 'compute merge-base (all)' \ - 'MB=$(git merge-base --all G H) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/B"' - -test_expect_success 'compute merge-base with show-branch' \ - 'MB=$(git show-branch --merge-base G H) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/B"' - -# Setup for second test to demonstrate that relying on timestamps in a -# distributed SCM to provide a _consistent_ partial ordering of commits -# leads to insanity. -# -# Relative -# Structure timestamps -# -# PL PR +4 +4 -# / \/ \ / \/ \ -# L2 C2 R2 +3 -1 +3 -# | | | | | | -# L1 C1 R1 +2 -2 +2 -# | | | | | | -# L0 C0 R0 +1 -3 +1 -# \ | / \ | / -# S 0 -# -# The left and right chains of commits can be of any length and complexity as -# long as all of the timestamps are greater than that of S. +test_expect_success 'setup' ' + T=$(git write-tree) && -S=$(doit 0 S) + M=1130000000 && + Z=+0000 && -C0=$(doit -3 C0 $S) -C1=$(doit -2 C1 $C0) -C2=$(doit -1 C2 $C1) + GIT_COMMITTER_EMAIL=git@comm.iter.xz && + GIT_COMMITTER_NAME="C O Mmiter" && + GIT_AUTHOR_NAME="A U Thor" && + GIT_AUTHOR_EMAIL=git@au.thor.xz && + export GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL && -L0=$(doit 1 L0 $S) -L1=$(doit 2 L1 $L0) -L2=$(doit 3 L2 $L1) + doit() { + OFFSET=$1 && + NAME=$2 && + shift 2 && -R0=$(doit 1 R0 $S) -R1=$(doit 2 R1 $R0) -R2=$(doit 3 R2 $R1) + PARENTS= && + for P + do + PARENTS="${PARENTS}-p $P " + done && -PL=$(doit 4 PL $L2 $C2) -PR=$(doit 4 PR $C2 $R2) + GIT_COMMITTER_DATE="$(($M + $OFFSET)) $Z" && + GIT_AUTHOR_DATE=$GIT_COMMITTER_DATE && + export GIT_COMMITTER_DATE GIT_AUTHOR_DATE && -test_expect_success 'compute merge-base (single)' \ - 'MB=$(git merge-base PL PR) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/C2"' + commit=$(echo $NAME | git commit-tree $T $PARENTS) && -test_expect_success 'compute merge-base (all)' \ - 'MB=$(git merge-base --all PL PR) && - expr "$(git name-rev "$MB")" : "[0-9a-f]* tags/C2"' + echo $commit >.git/refs/tags/$NAME && + echo $commit + } +' -# Another set to demonstrate base between one commit and a merge -# in the documentation. -# -# * C (MMC) * B (MMB) * A (MMA) -# * o * o * o -# * o * o * o -# * o * o * o -# * o | _______/ -# | |/ -# | * 1 (MM1) -# | _______/ -# |/ -# * root (MMR) +test_expect_success 'set up G and H' ' + # E---D---C---B---A + # \"-_ \ \ + # \ `---------G \ + # \ \ + # F----------------H + E=$(doit 5 E) && + D=$(doit 4 D $E) && + F=$(doit 6 F $E) && + C=$(doit 3 C $D) && + B=$(doit 2 B $C) && + A=$(doit 1 A $B) && + G=$(doit 7 G $B $E) && + H=$(doit 8 H $A $F) +' + +test_expect_success 'merge-base G H' ' + git name-rev $B >expected && + + MB=$(git merge-base G H) && + git name-rev "$MB" >actual.single && + + MB=$(git merge-base --all G H) && + git name-rev "$MB" >actual.all && + + MB=$(git show-branch --merge-base G H) && + git name-rev "$MB" >actual.sb && + + test_cmp expected actual.single && + test_cmp expected actual.all && + test_cmp expected actual.sb +' +test_expect_success 'merge-base/show-branch --independent' ' + git name-rev "$H" >expected1 && + git name-rev "$H" "$G" >expected2 && + + parents=$(git merge-base --independent H) && + git name-rev $parents >actual1.mb && + parents=$(git merge-base --independent A H G) && + git name-rev $parents >actual2.mb && + + parents=$(git show-branch --independent H) && + git name-rev $parents >actual1.sb && + parents=$(git show-branch --independent A H G) && + git name-rev $parents >actual2.sb && + + test_cmp expected1 actual1.mb && + test_cmp expected2 actual2.mb && + test_cmp expected1 actual1.sb && + test_cmp expected2 actual2.sb +' + +test_expect_success 'unsynchronized clocks' ' + # This test is to demonstrate that relying on timestamps in a distributed + # SCM to provide a _consistent_ partial ordering of commits leads to + # insanity. + # + # Relative + # Structure timestamps + # + # PL PR +4 +4 + # / \/ \ / \/ \ + # L2 C2 R2 +3 -1 +3 + # | | | | | | + # L1 C1 R1 +2 -2 +2 + # | | | | | | + # L0 C0 R0 +1 -3 +1 + # \ | / \ | / + # S 0 + # + # The left and right chains of commits can be of any length and complexity as + # long as all of the timestamps are greater than that of S. + + S=$(doit 0 S) && + + C0=$(doit -3 C0 $S) && + C1=$(doit -2 C1 $C0) && + C2=$(doit -1 C2 $C1) && + + L0=$(doit 1 L0 $S) && + L1=$(doit 2 L1 $L0) && + L2=$(doit 3 L2 $L1) && + + R0=$(doit 1 R0 $S) && + R1=$(doit 2 R1 $R0) && + R2=$(doit 3 R2 $R1) && + + PL=$(doit 4 PL $L2 $C2) && + PR=$(doit 4 PR $C2 $R2) + + git name-rev $C2 >expected && + + MB=$(git merge-base PL PR) && + git name-rev "$MB" >actual.single && + + MB=$(git merge-base --all PL PR) && + git name-rev "$MB" >actual.all && + + test_cmp expected actual.single && + test_cmp expected actual.all +' + +test_expect_success '--independent with unsynchronized clocks' ' + IB=$(doit 0 IB) && + I1=$(doit -10 I1 $IB) && + I2=$(doit -9 I2 $I1) && + I3=$(doit -8 I3 $I2) && + I4=$(doit -7 I4 $I3) && + I5=$(doit -6 I5 $I4) && + I6=$(doit -5 I6 $I5) && + I7=$(doit -4 I7 $I6) && + I8=$(doit -3 I8 $I7) && + IH=$(doit -2 IH $I8) && + + echo $IH >expected && + git merge-base --independent IB IH >actual && + test_cmp expected actual +' test_expect_success 'merge-base for octopus-step (setup)' ' - test_tick && git commit --allow-empty -m root && git tag MMR && - test_tick && git commit --allow-empty -m 1 && git tag MM1 && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m A && git tag MMA && + # Another set to demonstrate base between one commit and a merge + # in the documentation. + # + # * C (MMC) * B (MMB) * A (MMA) + # * o * o * o + # * o * o * o + # * o * o * o + # * o | _______/ + # | |/ + # | * 1 (MM1) + # | _______/ + # |/ + # * root (MMR) + + test_commit MMR && + test_commit MM1 && + test_commit MM-o && + test_commit MM-p && + test_commit MM-q && + test_commit MMA && git checkout MM1 && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m B && git tag MMB && + test_commit MM-r && + test_commit MM-s && + test_commit MM-t && + test_commit MMB && git checkout MMR && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m C && git tag MMC + test_commit MM-u && + test_commit MM-v && + test_commit MM-w && + test_commit MM-x && + test_commit MMC ' test_expect_success 'merge-base A B C' ' - MB=$(git merge-base --all MMA MMB MMC) && - MM1=$(git rev-parse --verify MM1) && - test "$MM1" = "$MB" -' + git rev-parse --verify MM1 >expected && + git rev-parse --verify MMR >expected.sb && -test_expect_success 'merge-base A B C using show-branch' ' - MB=$(git show-branch --merge-base MMA MMB MMC) && - MMR=$(git rev-parse --verify MMR) && - test "$MMR" = "$MB" + git merge-base --all MMA MMB MMC >actual && + git merge-base --all --octopus MMA MMB MMC >actual.common && + git show-branch --merge-base MMA MMB MMC >actual.sb && + + test_cmp expected actual && + test_cmp expected.sb actual.common && + test_cmp expected.sb actual.sb ' -test_expect_success 'criss-cross merge-base for octopus-step (setup)' ' +test_expect_success 'criss-cross merge-base for octopus-step' ' git reset --hard MMR && - test_tick && git commit --allow-empty -m 1 && git tag CC1 && + test_commit CC1 && git reset --hard E && - test_tick && git commit --allow-empty -m 2 && git tag CC2 && - test_tick && git merge -s ours CC1 && - test_tick && git commit --allow-empty -m o && - test_tick && git commit --allow-empty -m B && git tag CCB && + test_commit CC2 && + test_tick && + git merge -s ours CC1 && + test_commit CC-o && + test_commit CCB && git reset --hard CC1 && - test_tick && git merge -s ours CC2 && - test_tick && git commit --allow-empty -m A && git tag CCA -' + git merge -s ours CC2 && + test_commit CCA && + + git rev-parse CC1 CC2 >expected && + git merge-base --all CCB CCA^^ CCA^^2 >actual && -test_expect_success 'merge-base B A^^ A^^2' ' - MB0=$(git merge-base --all CCB CCA^^ CCA^^2 | sort) && - MB1=$(git rev-parse CC1 CC2 | sort) && - test "$MB0" = "$MB1" + sort expected >expected.sorted && + sort actual >actual.sorted && + test_cmp expected.sorted actual.sorted ' test_done diff --git a/t/t6018-rev-list-glob.sh b/t/t6018-rev-list-glob.sh index 58428d9f5c..fb8291c812 100755 --- a/t/t6018-rev-list-glob.sh +++ b/t/t6018-rev-list-glob.sh @@ -123,6 +123,12 @@ test_expect_success 'rev-list --glob=refs/heads/subspace/*' ' ' +test_expect_success 'rev-list --glob refs/heads/subspace/*' ' + + compare rev-list "subspace/one subspace/two" "--glob refs/heads/subspace/*" + +' + test_expect_success 'rev-list --glob=heads/subspace/*' ' compare rev-list "subspace/one subspace/two" "--glob=heads/subspace/*" diff --git a/t/t6020-merge-df.sh b/t/t6020-merge-df.sh index e71c687f2b..490d397114 100755 --- a/t/t6020-merge-df.sh +++ b/t/t6020-merge-df.sh @@ -22,7 +22,7 @@ git commit -m "File: dir"' test_expect_code 1 'Merge with d/f conflicts' 'git merge "merge msg" B master' -test_expect_failure 'F/D conflict' ' +test_expect_success 'F/D conflict' ' git reset --hard && git checkout master && rm .git/index && diff --git a/t/t6031-merge-recursive.sh b/t/t6031-merge-recursive.sh index 8a3304fb0b..1cd649e245 100755 --- a/t/t6031-merge-recursive.sh +++ b/t/t6031-merge-recursive.sh @@ -2,11 +2,7 @@ test_description='merge-recursive: handle file mode' . ./test-lib.sh - -if ! test "$(git config --bool core.filemode)" = false -then - test_set_prereq FILEMODE -fi +. "$TEST_DIRECTORY"/lib-prereq-FILEMODE.sh test_expect_success 'mode change in one branch: keep changed version' ' : >file1 && @@ -57,4 +53,35 @@ test_expect_success FILEMODE 'verify executable bit on file' ' test -x file2 ' +test_expect_success 'merging with triple rename across D/F conflict' ' + git reset --hard HEAD && + git checkout -b main && + git rm -rf . && + + echo "just a file" >sub1 && + mkdir -p sub2 && + echo content1 >sub2/file1 && + echo content2 >sub2/file2 && + echo content3 >sub2/file3 && + mkdir simple && + echo base >simple/bar && + git add -A && + test_tick && + git commit -m base && + + git checkout -b other && + echo more >>simple/bar && + test_tick && + git commit -a -m changesimplefile && + + git checkout main && + git rm sub1 && + git mv sub2 sub1 && + test_tick && + git commit -m changefiletodir && + + test_tick && + git merge other +' + test_done diff --git a/t/t6035-merge-dir-to-symlink.sh b/t/t6035-merge-dir-to-symlink.sh index cd3190c4a6..92e02d5d77 100755 --- a/t/t6035-merge-dir-to-symlink.sh +++ b/t/t6035-merge-dir-to-symlink.sh @@ -3,13 +3,7 @@ test_description='merging when a directory was replaced with a symlink' . ./test-lib.sh -if ! test_have_prereq SYMLINKS -then - skip_all='Symbolic links not supported, skipping tests.' - test_done -fi - -test_expect_success 'create a commit where dir a/b changed to symlink' ' +test_expect_success SYMLINKS 'create a commit where dir a/b changed to symlink' ' mkdir -p a/b/c a/b-2/c && > a/b/c/d && > a/b-2/c/d && @@ -23,7 +17,7 @@ test_expect_success 'create a commit where dir a/b changed to symlink' ' git commit -m "dir to symlink" ' -test_expect_success 'keep a/b-2/c/d across checkout' ' +test_expect_success SYMLINKS 'keep a/b-2/c/d across checkout' ' git checkout HEAD^0 && git reset --hard master && git rm --cached a/b && @@ -32,14 +26,14 @@ test_expect_success 'keep a/b-2/c/d across checkout' ' test -f a/b-2/c/d ' -test_expect_success 'checkout should not have deleted a/b-2/c/d' ' +test_expect_success SYMLINKS 'checkout should not have deleted a/b-2/c/d' ' git checkout HEAD^0 && git reset --hard master && git checkout start^0 && test -f a/b-2/c/d ' -test_expect_success 'setup for merge test' ' +test_expect_success SYMLINKS 'setup for merge test' ' git reset --hard && test -f a/b-2/c/d && echo x > a/x && @@ -48,7 +42,7 @@ test_expect_success 'setup for merge test' ' git tag baseline ' -test_expect_success 'do not lose a/b-2/c/d in merge (resolve)' ' +test_expect_success SYMLINKS 'Handle D/F conflict, do not lose a/b-2/c/d in merge (resolve)' ' git reset --hard && git checkout baseline^0 && git merge -s resolve master && @@ -56,7 +50,7 @@ test_expect_success 'do not lose a/b-2/c/d in merge (resolve)' ' test -f a/b-2/c/d ' -test_expect_failure 'do not lose a/b-2/c/d in merge (recursive)' ' +test_expect_success SYMLINKS 'Handle D/F conflict, do not lose a/b-2/c/d in merge (recursive)' ' git reset --hard && git checkout baseline^0 && git merge -s recursive master && @@ -64,7 +58,55 @@ test_expect_failure 'do not lose a/b-2/c/d in merge (recursive)' ' test -f a/b-2/c/d ' -test_expect_success 'setup a merge where dir a/b-2 changed to symlink' ' +test_expect_success SYMLINKS 'Handle F/D conflict, do not lose a/b-2/c/d in merge (resolve)' ' + git reset --hard && + git checkout master^0 && + git merge -s resolve baseline^0 && + test -h a/b && + test -f a/b-2/c/d +' + +test_expect_success SYMLINKS 'Handle F/D conflict, do not lose a/b-2/c/d in merge (recursive)' ' + git reset --hard && + git checkout master^0 && + git merge -s recursive baseline^0 && + test -h a/b && + test -f a/b-2/c/d +' + +test_expect_failure SYMLINKS 'do not lose untracked in merge (resolve)' ' + git reset --hard && + git checkout baseline^0 && + >a/b/c/e && + test_must_fail git merge -s resolve master && + test -f a/b/c/e && + test -f a/b-2/c/d +' + +test_expect_success SYMLINKS 'do not lose untracked in merge (recursive)' ' + git reset --hard && + git checkout baseline^0 && + >a/b/c/e && + test_must_fail git merge -s recursive master && + test -f a/b/c/e && + test -f a/b-2/c/d +' + +test_expect_success SYMLINKS 'do not lose modifications in merge (resolve)' ' + git reset --hard && + git checkout baseline^0 && + echo more content >>a/b/c/d && + test_must_fail git merge -s resolve master +' + +test_expect_success SYMLINKS 'do not lose modifications in merge (recursive)' ' + git reset --hard && + git checkout baseline^0 && + echo more content >>a/b/c/d && + test_must_fail git merge -s recursive master +' + +test_expect_success SYMLINKS 'setup a merge where dir a/b-2 changed to symlink' ' git reset --hard && git checkout start^0 && rm -rf a/b-2 && @@ -74,7 +116,7 @@ test_expect_success 'setup a merge where dir a/b-2 changed to symlink' ' git tag test2 ' -test_expect_success 'merge should not have conflicts (resolve)' ' +test_expect_success SYMLINKS 'merge should not have D/F conflicts (resolve)' ' git reset --hard && git checkout baseline^0 && git merge -s resolve test2 && @@ -82,7 +124,7 @@ test_expect_success 'merge should not have conflicts (resolve)' ' test -f a/b/c/d ' -test_expect_failure 'merge should not have conflicts (recursive)' ' +test_expect_success SYMLINKS 'merge should not have D/F conflicts (recursive)' ' git reset --hard && git checkout baseline^0 && git merge -s recursive test2 && @@ -90,4 +132,12 @@ test_expect_failure 'merge should not have conflicts (recursive)' ' test -f a/b/c/d ' +test_expect_success SYMLINKS 'merge should not have F/D conflicts (recursive)' ' + git reset --hard && + git checkout -b foo test2 && + git merge -s recursive baseline^0 && + test -h a/b-2 && + test -f a/b/c/d +' + test_done diff --git a/t/t6037-merge-ours-theirs.sh b/t/t6037-merge-ours-theirs.sh index 8ab3d61f44..2cf42c73f1 100755 --- a/t/t6037-merge-ours-theirs.sh +++ b/t/t6037-merge-ours-theirs.sh @@ -58,7 +58,7 @@ test_expect_success 'pull with -X' ' git reset --hard master && git pull -s recursive -X ours . side && git reset --hard master && git pull -s recursive -Xtheirs . side && git reset --hard master && git pull -s recursive -X theirs . side && - git reset --hard master && ! git pull -s recursive -X bork . side + git reset --hard master && test_must_fail git pull -s recursive -X bork . side ' test_done diff --git a/t/t6038-merge-text-auto.sh b/t/t6038-merge-text-auto.sh new file mode 100755 index 0000000000..52d0dc4bb8 --- /dev/null +++ b/t/t6038-merge-text-auto.sh @@ -0,0 +1,189 @@ +#!/bin/sh + +test_description='CRLF merge conflict across text=auto change + +* [master] remove .gitattributes + ! [side] add line from b +-- + + [side] add line from b +* [master] remove .gitattributes +* [master^] add line from a +* [master~2] normalize file +*+ [side^] Initial +' + +. ./test-lib.sh + +test_expect_success setup ' + git config core.autocrlf false && + + echo first line | append_cr >file && + echo first line >control_file && + echo only line >inert_file && + + git add file control_file inert_file && + test_tick && + git commit -m "Initial" && + git tag initial && + git branch side && + + echo "* text=auto" >.gitattributes && + touch file && + git add .gitattributes file && + test_tick && + git commit -m "normalize file" && + + echo same line | append_cr >>file && + echo same line >>control_file && + git add file control_file && + test_tick && + git commit -m "add line from a" && + git tag a && + + git rm .gitattributes && + rm file && + git checkout file && + test_tick && + git commit -m "remove .gitattributes" && + git tag c && + + git checkout side && + echo same line | append_cr >>file && + echo same line >>control_file && + git add file control_file && + test_tick && + git commit -m "add line from b" && + git tag b && + + git checkout master +' + +test_expect_success 'set up fuzz_conflict() helper' ' + fuzz_conflict() { + sed -e "s/^\([<>=]......\) .*/\1/" "$@" + } +' + +test_expect_success 'Merge after setting text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes && + git reset --hard a && + git merge b && + test_cmp expected file +' + +test_expect_success 'Merge addition of text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes && + git reset --hard b && + git merge a && + test_cmp expected file +' + +test_expect_success 'Detect CRLF/LF conflict after setting text=auto' ' + q_to_cr <<-\EOF >expected && + <<<<<<< + first line + same line + ======= + first lineQ + same lineQ + >>>>>>> + EOF + + git config merge.renormalize false && + rm -f .gitattributes && + git reset --hard a && + test_must_fail git merge b && + fuzz_conflict file >file.fuzzy && + test_cmp expected file.fuzzy +' + +test_expect_success 'Detect LF/CRLF conflict from addition of text=auto' ' + q_to_cr <<-\EOF >expected && + <<<<<<< + first lineQ + same lineQ + ======= + first line + same line + >>>>>>> + EOF + + git config merge.renormalize false && + rm -f .gitattributes && + git reset --hard b && + test_must_fail git merge a && + fuzz_conflict file >file.fuzzy && + test_cmp expected file.fuzzy +' + +test_expect_failure 'checkout -m after setting text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes && + git reset --hard initial && + git checkout a -- . && + git checkout -m b && + test_cmp expected file +' + +test_expect_failure 'checkout -m addition of text=auto' ' + cat <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + rm -f .gitattributes file && + git reset --hard initial && + git checkout b -- . && + git checkout -m a && + test_cmp expected file +' + +test_expect_failure 'cherry-pick patch from after text=auto was added' ' + append_cr <<-\EOF >expected && + first line + same line + EOF + + git config merge.renormalize true && + git rm -fr . && + git reset --hard b && + test_must_fail git cherry-pick a >err 2>&1 && + grep "[Nn]othing added" err && + test_cmp expected file +' + +test_expect_success 'Test delete/normalize conflict' ' + git checkout -f side && + git rm -fr . && + rm -f .gitattributes && + git reset --hard initial && + git rm file && + git commit -m "remove file" && + git checkout master && + git reset --hard a^ && + git merge side +' + +test_done diff --git a/t/t6050-replace.sh b/t/t6050-replace.sh index 4185b7ca1d..dd917d76da 100755 --- a/t/t6050-replace.sh +++ b/t/t6050-replace.sh @@ -209,7 +209,7 @@ test_expect_success 'fetch branch with replacement' ' test_expect_success 'bisect and replacements' ' git bisect start $HASH7 $HASH1 && - test "$S" = "$(git rev-parse --verify HEAD)" && + test "$PARA3" = "$(git rev-parse --verify HEAD)" && git bisect reset && GIT_NO_REPLACE_OBJECTS=1 git bisect start $HASH7 $HASH1 && test "$HASH4" = "$(git rev-parse --verify HEAD)" && diff --git a/t/t6200-fmt-merge-msg.sh b/t/t6200-fmt-merge-msg.sh index 42f8ece097..71f6cad3c2 100755 --- a/t/t6200-fmt-merge-msg.sh +++ b/t/t6200-fmt-merge-msg.sh @@ -70,14 +70,13 @@ test_expect_success setup ' i=$(($i+1)) done && - git show-branch -' + git show-branch && -cat >expected <<\EOF -Merge branch 'left' -EOF + apos="'\''" +' -test_expect_success 'merge-msg test #1' ' +test_expect_success 'message for merging local branch' ' + echo "Merge branch ${apos}left${apos}" >expected && git checkout master && git fetch . left && @@ -86,11 +85,8 @@ test_expect_success 'merge-msg test #1' ' test_cmp expected actual ' -cat >expected <<EOF -Merge branch 'left' of $(pwd) -EOF - -test_expect_success 'merge-msg test #2' ' +test_expect_success 'message for merging external branch' ' + echo "Merge branch ${apos}left${apos} of $(pwd)" >expected && git checkout master && git fetch "$(pwd)" left && @@ -99,139 +95,140 @@ test_expect_success 'merge-msg test #2' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge branch 'left' - -* left: - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 -EOF +test_expect_success '[merge] summary/log configuration' ' + cat >expected <<-EOF && + Merge branch ${apos}left${apos} -test_expect_success 'merge-msg test #3-1' ' + * left: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF - git config --unset-all merge.log - git config --unset-all merge.summary git config merge.log true && + test_might_fail git config --unset-all merge.summary && git checkout master && test_tick && git fetch . left && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #3-2' ' + git fmt-merge-msg <.git/FETCH_HEAD >actual1 && - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary true && git checkout master && test_tick && git fetch . left && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -cat >expected <<\EOF -Merge branches 'left' and 'right' + git fmt-merge-msg <.git/FETCH_HEAD >actual2 && -* left: - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 + test_cmp expected actual1 && + test_cmp expected actual2 +' -* right: - Right #5 - Right #4 - Right #3 - Common #2 - Common #1 -EOF +test_expect_success 'fmt-merge-msg -m' ' + echo "Sync with left" >expected && + cat >expected.log <<-EOF && + Sync with left + + * ${apos}left${apos} of $(pwd): + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset merge.log && + test_might_fail git config --unset merge.summary && + git checkout master && + git fetch "$(pwd)" left && + git fmt-merge-msg -m "Sync with left" <.git/FETCH_HEAD >actual && + git fmt-merge-msg --log -m "Sync with left" \ + <.git/FETCH_HEAD >actual.log && + git config merge.log true && + git fmt-merge-msg -m "Sync with left" \ + <.git/FETCH_HEAD >actual.log-config && + git fmt-merge-msg --no-log -m "Sync with left" \ + <.git/FETCH_HEAD >actual.nolog && + + test_cmp expected actual && + test_cmp expected.log actual.log && + test_cmp expected.log actual.log-config && + test_cmp expected actual.nolog +' -test_expect_success 'merge-msg test #4-1' ' +test_expect_success 'setup: expected shortlog for two branches' ' + cat >expected <<-EOF + Merge branches ${apos}left${apos} and ${apos}right${apos} + + * left: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + + * right: + Right #5 + Right #4 + Right #3 + Common #2 + Common #1 + EOF +' - git config --unset-all merge.log - git config --unset-all merge.summary +test_expect_success 'shortlog for two branches' ' git config merge.log true && - + test_might_fail git config --unset-all merge.summary && git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual1 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #4-2' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary true && - git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual2 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #5-1' ' - - git config --unset-all merge.log - git config --unset-all merge.summary git config merge.log yes && - + test_might_fail git config --unset-all merge.summary && git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual3 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual -' - -test_expect_success 'merge-msg test #5-2' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && - git checkout master && test_tick && git fetch . left right && + git fmt-merge-msg <.git/FETCH_HEAD >actual4 && - git fmt-merge-msg <.git/FETCH_HEAD >actual && - test_cmp expected actual + test_cmp expected actual1 && + test_cmp expected actual2 && + test_cmp expected actual3 && + test_cmp expected actual4 ' test_expect_success 'merge-msg -F' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && - git checkout master && test_tick && git fetch . left right && - git fmt-merge-msg -F .git/FETCH_HEAD >actual && test_cmp expected actual ' test_expect_success 'merge-msg -F in subdirectory' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && - git checkout master && test_tick && git fetch . left right && @@ -245,11 +242,11 @@ test_expect_success 'merge-msg -F in subdirectory' ' ' test_expect_success 'merge-msg with nothing to merge' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + test_might_fail git config --unset-all merge.log && git config merge.summary yes && + >empty && + ( cd remote && git checkout -b unrelated && @@ -258,22 +255,20 @@ test_expect_success 'merge-msg with nothing to merge' ' git fmt-merge-msg <.git/FETCH_HEAD >../actual ) && - test_cmp /dev/null actual + test_cmp empty actual ' -cat >expected <<\EOF -Merge tag 'tag-r3' - -* tag 'tag-r3': - Right #3 - Common #2 - Common #1 -EOF - test_expect_success 'merge-msg tag' ' + cat >expected <<-EOF && + Merge tag ${apos}tag-r3${apos} - git config --unset-all merge.log - git config --unset-all merge.summary + * tag ${apos}tag-r3${apos}: + Right #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset-all merge.log && git config merge.summary yes && git checkout master && @@ -284,26 +279,24 @@ test_expect_success 'merge-msg tag' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge tags 'tag-r3' and 'tag-l5' - -* tag 'tag-r3': - Right #3 - Common #2 - Common #1 - -* tag 'tag-l5': - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 -EOF - test_expect_success 'merge-msg two tags' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + cat >expected <<-EOF && + Merge tags ${apos}tag-r3${apos} and ${apos}tag-l5${apos} + + * tag ${apos}tag-r3${apos}: + Right #3 + Common #2 + Common #1 + + * tag ${apos}tag-l5${apos}: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset-all merge.log && git config merge.summary yes && git checkout master && @@ -314,26 +307,24 @@ test_expect_success 'merge-msg two tags' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge branch 'left', tag 'tag-r3' - -* tag 'tag-r3': - Right #3 - Common #2 - Common #1 - -* left: - Left #5 - Left #4 - Left #3 - Common #2 - Common #1 -EOF - test_expect_success 'merge-msg tag and branch' ' - - git config --unset-all merge.log - git config --unset-all merge.summary + cat >expected <<-EOF && + Merge branch ${apos}left${apos}, tag ${apos}tag-r3${apos} + + * tag ${apos}tag-r3${apos}: + Right #3 + Common #2 + Common #1 + + * left: + Left #5 + Left #4 + Left #3 + Common #2 + Common #1 + EOF + + test_might_fail git config --unset-all merge.log && git config merge.summary yes && git checkout master && @@ -344,26 +335,27 @@ test_expect_success 'merge-msg tag and branch' ' test_cmp expected actual ' -cat >expected <<\EOF -Merge branch 'long' - -* long: (35 commits) -EOF - test_expect_success 'merge-msg lots of commits' ' + { + cat <<-EOF && + Merge branch ${apos}long${apos} + + * long: (35 commits) + EOF + + i=29 && + while test $i -gt 9 + do + echo " $i" && + i=$(($i-1)) + done && + echo " ..." + } >expected && git checkout master && test_tick && git fetch . long && - i=29 && - while test $i -gt 9 - do - echo " $i" && - i=$(($i-1)) - done >>expected && - echo " ..." >>expected - git fmt-merge-msg <.git/FETCH_HEAD >actual && test_cmp expected actual ' diff --git a/t/t7005-editor.sh b/t/t7005-editor.sh index 26ddf9d496..1b530b5022 100755 --- a/t/t7005-editor.sh +++ b/t/t7005-editor.sh @@ -111,13 +111,13 @@ do ' done -if ! echo 'echo space > "$1"' > "e space.sh" +if echo 'echo space > "$1"' > "e space.sh" then - skip_all="Skipping; FS does not support spaces in filenames" - test_done + # FS supports spaces in filenames + test_set_prereq SPACES_IN_FILENAMES fi -test_expect_success 'editor with a space' ' +test_expect_success SPACES_IN_FILENAMES 'editor with a space' ' chmod a+x "e space.sh" && GIT_EDITOR="./e\ space.sh" git commit --amend && @@ -126,7 +126,7 @@ test_expect_success 'editor with a space' ' ' unset GIT_EDITOR -test_expect_success 'core.editor with a space' ' +test_expect_success SPACES_IN_FILENAMES 'core.editor with a space' ' git config core.editor \"./e\ space.sh\" && git commit --amend && diff --git a/t/t7006-pager.sh b/t/t7006-pager.sh index 71d3ceff8f..fb744e3c4a 100755 --- a/t/t7006-pager.sh +++ b/t/t7006-pager.sh @@ -58,6 +58,21 @@ test_expect_success TTY 'some commands use a pager' ' test -e paginated.out ' +test_expect_failure TTY 'pager runs from subdir' ' + echo subdir/paginated.out >expected && + mkdir -p subdir && + rm -f paginated.out subdir/paginated.out && + ( + cd subdir && + test_terminal git log + ) && + { + ls paginated.out subdir/paginated.out || + : + } >actual && + test_cmp expected actual +' + test_expect_success TTY 'some commands do not use a pager' ' rm -f paginated.out || cleanup_fail && @@ -106,6 +121,45 @@ test_expect_success TTY 'no pager with --no-pager' ' ! test -e paginated.out ' +test_expect_success TTY 'configuration can disable pager' ' + rm -f paginated.out && + test_might_fail git config --unset pager.grep && + test_terminal git grep initial && + test -e paginated.out && + + rm -f paginated.out && + git config pager.grep false && + test_when_finished "git config --unset pager.grep" && + test_terminal git grep initial && + ! test -e paginated.out +' + +test_expect_success TTY 'git config uses a pager if configured to' ' + rm -f paginated.out && + git config pager.config true && + test_when_finished "git config --unset pager.config" && + test_terminal git config --list && + test -e paginated.out +' + +test_expect_success TTY 'configuration can enable pager (from subdir)' ' + rm -f paginated.out && + mkdir -p subdir && + git config pager.bundle true && + test_when_finished "git config --unset pager.bundle" && + + git bundle create test.bundle --all && + rm -f paginated.out subdir/paginated.out && + ( + cd subdir && + test_terminal git bundle unbundle ../test.bundle + ) && + { + test -e paginated.out || + test -e subdir/paginated.out + } +' + # A colored commit log will begin with an appropriate ANSI escape # for the first color; the text "commit" comes later. colorful() { @@ -369,4 +423,16 @@ test_GIT_PAGER_overrides expect_success test_must_fail 'git -p' test_doesnt_paginate expect_failure test_must_fail 'git -p nonsense' +test_pager_choices 'git shortlog' +test_expect_success 'setup: configure shortlog not to paginate' ' + git config pager.shortlog false +' +test_doesnt_paginate expect_success 'git shortlog' +test_no_local_config_subdir expect_success 'git shortlog' +test_default_pager expect_success 'git -p shortlog' +test_core_pager_subdir expect_success 'git -p shortlog' + +test_core_pager_subdir expect_success test_must_fail \ + 'git -p apply </dev/null' + test_done diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index eb8ca88cce..c0f9f3f705 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -61,7 +61,7 @@ test_expect_success 'git grep -Fi iLE a' ' # This test actually passes on platforms where regexec() supports the # flag REG_STARTEND. -test_expect_failure 'git grep ile a' ' +test_expect_success 'git grep ile a' ' git grep ile a ' diff --git a/t/t7105-reset-patch.sh b/t/t7105-reset-patch.sh index c1f4fc3c65..9891e2c1f5 100755 --- a/t/t7105-reset-patch.sh +++ b/t/t7105-reset-patch.sh @@ -3,7 +3,7 @@ test_description='git reset --patch' . ./lib-patch-mode.sh -test_expect_success 'setup' ' +test_expect_success PERL 'setup' ' mkdir dir && echo parent > dir/foo && echo dummy > bar && @@ -17,20 +17,20 @@ test_expect_success 'setup' ' # note: bar sorts before foo, so the first 'n' is always to skip 'bar' -test_expect_success 'saying "n" does nothing' ' +test_expect_success PERL 'saying "n" does nothing' ' set_and_save_state dir/foo work work (echo n; echo n) | git reset -p && verify_saved_state dir/foo && verify_saved_state bar ' -test_expect_success 'git reset -p' ' +test_expect_success PERL 'git reset -p' ' (echo n; echo y) | git reset -p && verify_state dir/foo work head && verify_saved_state bar ' -test_expect_success 'git reset -p HEAD^' ' +test_expect_success PERL 'git reset -p HEAD^' ' (echo n; echo y) | git reset -p HEAD^ && verify_state dir/foo work parent && verify_saved_state bar @@ -41,27 +41,27 @@ test_expect_success 'git reset -p HEAD^' ' # dir/foo. There's always an extra 'n' to reject edits to dir/foo in # the failure case (and thus get out of the loop). -test_expect_success 'git reset -p dir' ' +test_expect_success PERL 'git reset -p dir' ' set_state dir/foo work work (echo y; echo n) | git reset -p dir && verify_state dir/foo work head && verify_saved_state bar ' -test_expect_success 'git reset -p -- foo (inside dir)' ' +test_expect_success PERL 'git reset -p -- foo (inside dir)' ' set_state dir/foo work work (echo y; echo n) | (cd dir && git reset -p -- foo) && verify_state dir/foo work head && verify_saved_state bar ' -test_expect_success 'git reset -p HEAD^ -- dir' ' +test_expect_success PERL 'git reset -p HEAD^ -- dir' ' (echo y; echo n) | git reset -p HEAD^ -- dir && verify_state dir/foo work parent && verify_saved_state bar ' -test_expect_success 'none of this moved HEAD' ' +test_expect_success PERL 'none of this moved HEAD' ' verify_saved_head ' diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 7d8ed68bef..7dbbea13ac 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -388,16 +388,15 @@ test_expect_success 'core.excludesfile' ' ' -test_expect_success 'removal failure' ' +test_expect_success SANITY 'removal failure' ' mkdir foo && touch foo/bar && (exec <foo/bar && chmod 0 foo && - test_must_fail git clean -f -d) - + test_must_fail git clean -f -d && + chmod 755 foo) ' -chmod 755 foo test_expect_success 'nested git work tree' ' rm -fr foo bar && @@ -438,4 +437,20 @@ test_expect_success 'force removal of nested git work tree' ' ! test -d bar ' +test_expect_success 'git clean -e' ' + rm -fr repo && + mkdir repo && + ( + cd repo && + git init && + touch 1 2 3 known && + git add known && + git clean -f -e 1 -e 2 && + test -e 1 && + test -e 2 && + ! (test -e 3) && + test -e known + ) +' + test_done diff --git a/t/t7405-submodule-merge.sh b/t/t7405-submodule-merge.sh index 4a7b8933f4..7e2e258950 100755 --- a/t/t7405-submodule-merge.sh +++ b/t/t7405-submodule-merge.sh @@ -54,21 +54,132 @@ test_expect_success setup ' git merge -s ours a ' -test_expect_success 'merging with modify/modify conflict' ' +# History setup +# +# b +# / \ +# a d +# \ / +# c +# +# a in the main repository records to sub-a in the submodule and +# analogous b and c. d should be automatically found by merging c into +# b in the main repository. +test_expect_success 'setup for merge search' ' + mkdir merge-search && + (cd merge-search && + git init && + mkdir sub && + (cd sub && + git init && + echo "file-a" > file-a && + git add file-a && + git commit -m "sub-a" && + git branch sub-a) && + git add sub && + git commit -m "a" && + git branch a && + + git checkout -b b && + (cd sub && + git checkout -b sub-b && + echo "file-b" > file-b && + git add file-b && + git commit -m "sub-b") && + git commit -a -m "b" && + + git checkout -b c a && + (cd sub && + git checkout -b sub-c sub-a && + echo "file-c" > file-c && + git add file-c && + git commit -m "sub-c") && + git commit -a -m "c" && - git checkout -b test1 a && - test_must_fail git merge b && - test -f .git/MERGE_MSG && - git diff && - test -n "$(git ls-files -u)" + git checkout -b d a && + (cd sub && + git checkout -b sub-d sub-b && + git merge sub-c) && + git commit -a -m "d" && + git branch test b) ' -test_expect_success 'merging with a modify/modify conflict between merge bases' ' +test_expect_success 'merge with one side as a fast-forward of the other' ' + (cd merge-search && + git checkout -b test-forward b && + git merge d && + git ls-tree test-forward sub | cut -f1 | cut -f3 -d" " > actual && + (cd sub && + git rev-parse sub-d > ../expect) && + test_cmp actual expect) +' +test_expect_success 'merging should conflict for non fast-forward' ' + (cd merge-search && + git checkout -b test-nonforward b && + (cd sub && + git rev-parse sub-d > ../expect) && + test_must_fail git merge c 2> actual && + grep $(cat expect) actual > /dev/null && + git reset --hard) +' + +test_expect_success 'merging should fail for ambiguous common parent' ' + (cd merge-search && + git checkout -b test-ambiguous b && + (cd sub && + git checkout -b ambiguous sub-b && + git merge sub-c && + git rev-parse sub-d > ../expect1 && + git rev-parse ambiguous > ../expect2) && + test_must_fail git merge c 2> actual && + grep $(cat expect1) actual > /dev/null && + grep $(cat expect2) actual > /dev/null && + git reset --hard) +' + +# in a situation like this +# +# submodule tree: +# +# sub-a --- sub-b --- sub-d +# +# main tree: +# +# e (sub-a) +# / +# bb (sub-b) +# \ +# f (sub-d) +# +# A merge between e and f should fail because one of the submodule +# commits (sub-a) does not descend from the submodule merge-base (sub-b). +# +test_expect_success 'merging should fail for changes that are backwards' ' + (cd merge-search && + git checkout -b bb a && + (cd sub && + git checkout sub-b) && + git commit -a -m "bb" && + + git checkout -b e bb && + (cd sub && + git checkout sub-a) && + git commit -a -m "e" && + + git checkout -b f bb && + (cd sub && + git checkout sub-d) && + git commit -a -m "f" && + + git checkout -b test-backward e && + test_must_fail git merge f) +' + +test_expect_success 'merging with a modify/modify conflict between merge bases' ' git reset --hard HEAD && git checkout -b test2 c && git merge d - ' test_done diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index 1382a8e58a..bfb4975e94 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -25,7 +25,7 @@ test_expect_success 'setup a submodule tree' ' echo file > file && git add file && test_tick && - git commit -m upstream + git commit -m upstream && git clone . super && git clone super submodule && git clone super rebasing && diff --git a/t/t7407-submodule-foreach.sh b/t/t7407-submodule-foreach.sh index db9365b645..905a8baae9 100755 --- a/t/t7407-submodule-foreach.sh +++ b/t/t7407-submodule-foreach.sh @@ -16,7 +16,7 @@ test_expect_success 'setup a submodule tree' ' echo file > file && git add file && test_tick && - git commit -m upstream + git commit -m upstream && git clone . super && git clone super submodule && ( @@ -30,7 +30,7 @@ test_expect_success 'setup a submodule tree' ' submodule.sub2 submodule.foo2 && git config -f .gitmodules --rename-section \ submodule.sub3 submodule.foo3 && - git add .gitmodules + git add .gitmodules && test_tick && git commit -m "submodules" && git submodule init sub1 && diff --git a/t/t7508-status.sh b/t/t7508-status.sh index a72fe3ae64..c9300f3c8b 100755 --- a/t/t7508-status.sh +++ b/t/t7508-status.sh @@ -793,7 +793,7 @@ test_expect_success 'commit --dry-run submodule summary (--amend)' ' test_cmp expect output ' -test_expect_success POSIXPERM 'status succeeds in a read-only repository' ' +test_expect_success POSIXPERM,SANITY 'status succeeds in a read-only repository' ' ( chmod a-w .git && # make dir1/tracked stat-dirty @@ -808,24 +808,38 @@ test_expect_success POSIXPERM 'status succeeds in a read-only repository' ' (exit $status) ' +(cd sm && echo > bar && git add bar && git commit -q -m 'Add bar') && git add sm +new_head=$(cd sm && git rev-parse --short=7 --verify HEAD) +touch .gitmodules + cat > expect << EOF # On branch master +# Changes to be committed: +# (use "git reset HEAD <file>..." to unstage) +# +# modified: sm +# # Changed but not updated: # (use "git add <file>..." to update what will be committed) # (use "git checkout -- <file>..." to discard changes in working directory) # # modified: dir1/modified # +# Submodule changes to be committed: +# +# * sm $head...$new_head (1): +# > Add bar +# # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked # expect # output # untracked -no changes added to commit (use "git add" and/or "git commit -a") EOF test_expect_success '--ignore-submodules=untracked suppresses submodules with untracked content' ' @@ -834,19 +848,89 @@ test_expect_success '--ignore-submodules=untracked suppresses submodules with un test_cmp expect output ' +test_expect_success '.gitmodules ignore=untracked suppresses submodules with untracked content' ' + git config diff.ignoreSubmodules dirty && + git status >output && + test_cmp expect output && + git config --add -f .gitmodules submodule.subname.ignore untracked && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname && + git config --unset diff.ignoreSubmodules +' + +test_expect_success '.git/config ignore=untracked suppresses submodules with untracked content' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore untracked && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config --remove-section -f .gitmodules submodule.subname +' + test_expect_success '--ignore-submodules=dirty suppresses submodules with untracked content' ' git status --ignore-submodules=dirty > output && test_cmp expect output ' +test_expect_success '.gitmodules ignore=dirty suppresses submodules with untracked content' ' + git config diff.ignoreSubmodules dirty && + git status >output && + ! test -s actual && + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname && + git config --unset diff.ignoreSubmodules +' + +test_expect_success '.git/config ignore=dirty suppresses submodules with untracked content' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore dirty && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + test_expect_success '--ignore-submodules=dirty suppresses submodules with modified content' ' echo modified > sm/foo && git status --ignore-submodules=dirty > output && test_cmp expect output ' +test_expect_success '.gitmodules ignore=dirty suppresses submodules with modified content' ' + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success '.git/config ignore=dirty suppresses submodules with modified content' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore dirty && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + cat > expect << EOF # On branch master +# Changes to be committed: +# (use "git reset HEAD <file>..." to unstage) +# +# modified: sm +# # Changed but not updated: # (use "git add <file>..." to update what will be committed) # (use "git checkout -- <file>..." to discard changes in working directory) @@ -855,16 +939,21 @@ cat > expect << EOF # modified: dir1/modified # modified: sm (modified content) # +# Submodule changes to be committed: +# +# * sm $head...$new_head (1): +# > Add bar +# # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked # expect # output # untracked -no changes added to commit (use "git add" and/or "git commit -a") EOF test_expect_success "--ignore-submodules=untracked doesn't suppress submodules with modified content" ' @@ -872,10 +961,34 @@ test_expect_success "--ignore-submodules=untracked doesn't suppress submodules w test_cmp expect output ' +test_expect_success ".gitmodules ignore=untracked doesn't suppress submodules with modified content" ' + git config --add -f .gitmodules submodule.subname.ignore untracked && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success ".git/config ignore=untracked doesn't suppress submodules with modified content" ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore untracked && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + head2=$(cd sm && git commit -q -m "2nd commit" foo && git rev-parse --short=7 --verify HEAD) cat > expect << EOF # On branch master +# Changes to be committed: +# (use "git reset HEAD <file>..." to unstage) +# +# modified: sm +# # Changed but not updated: # (use "git add <file>..." to update what will be committed) # (use "git checkout -- <file>..." to discard changes in working directory) @@ -883,21 +996,26 @@ cat > expect << EOF # modified: dir1/modified # modified: sm (new commits) # +# Submodule changes to be committed: +# +# * sm $head...$new_head (1): +# > Add bar +# # Submodules changed but not updated: # -# * sm $head...$head2 (1): +# * sm $new_head...$head2 (1): # > 2nd commit # # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked # expect # output # untracked -no changes added to commit (use "git add" and/or "git commit -a") EOF test_expect_success "--ignore-submodules=untracked doesn't suppress submodule summary" ' @@ -905,10 +1023,47 @@ test_expect_success "--ignore-submodules=untracked doesn't suppress submodule su test_cmp expect output ' +test_expect_success ".gitmodules ignore=untracked doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore untracked && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success ".git/config ignore=untracked doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore untracked && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + test_expect_success "--ignore-submodules=dirty doesn't suppress submodule summary" ' git status --ignore-submodules=dirty > output && test_cmp expect output ' +test_expect_success ".gitmodules ignore=dirty doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore dirty && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_success ".git/config ignore=dirty doesn't suppress submodule summary" ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore dirty && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' cat > expect << EOF # On branch master @@ -921,6 +1076,7 @@ cat > expect << EOF # Untracked files: # (use "git add <file>..." to include in what will be committed) # +# .gitmodules # dir1/untracked # dir2/modified # dir2/untracked @@ -935,4 +1091,23 @@ test_expect_success "--ignore-submodules=all suppresses submodule summary" ' test_cmp expect output ' +test_expect_failure '.gitmodules ignore=all suppresses submodule summary' ' + git config --add -f .gitmodules submodule.subname.ignore all && + git config --add -f .gitmodules submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config -f .gitmodules --remove-section submodule.subname +' + +test_expect_failure '.git/config ignore=all suppresses submodule summary' ' + git config --add -f .gitmodules submodule.subname.ignore none && + git config --add -f .gitmodules submodule.subname.path sm && + git config --add submodule.subname.ignore all && + git config --add submodule.subname.path sm && + git status > output && + test_cmp expect output && + git config --remove-section submodule.subname && + git config -f .gitmodules --remove-section submodule.subname +' + test_done diff --git a/t/t7509-commit.sh b/t/t7509-commit.sh index 3ea33db6c7..643ab03f99 100755 --- a/t/t7509-commit.sh +++ b/t/t7509-commit.sh @@ -111,7 +111,7 @@ test_expect_success '--amend option with empty author' ' test_when_finished "git checkout Initial" && echo "Empty author test" >>foo && test_tick && - ! git commit -a -m "empty author" --amend 2>err && + test_must_fail git commit -a -m "empty author" --amend 2>err && grep "empty ident" err ' @@ -125,7 +125,7 @@ test_expect_success '--amend option with missing author' ' test_when_finished "git checkout Initial" && echo "Missing author test" >>foo && test_tick && - ! git commit -a -m "malformed author" --amend 2>err && + test_must_fail git commit -a -m "malformed author" --amend 2>err && grep "empty ident" err ' diff --git a/t/t7600-merge.sh b/t/t7600-merge.sh index cde8390c1b..b4f40e4c3a 100755 --- a/t/t7600-merge.sh +++ b/t/t7600-merge.sh @@ -5,189 +5,103 @@ test_description='git merge -Testing basic merge operations/option parsing.' +Testing basic merge operations/option parsing. + +! [c0] commit 0 + ! [c1] commit 1 + ! [c2] commit 2 + ! [c3] commit 3 + ! [c4] c4 + ! [c5] c5 + ! [c6] c6 + * [master] Merge commit 'c1' +-------- + - [master] Merge commit 'c1' + + * [c1] commit 1 + + [c6] c6 + + [c5] c5 + ++ [c4] c4 + ++++ [c3] commit 3 + + [c2] commit 2 ++++++++* [c0] commit 0 +' . ./test-lib.sh -cat >file <<EOF -1 -2 -3 -4 -5 -6 -7 -8 -9 -EOF - -cat >file.1 <<EOF -1 X -2 -3 -4 -5 -6 -7 -8 -9 -EOF - -cat >file.5 <<EOF -1 -2 -3 -4 -5 X -6 -7 -8 -9 -EOF - -cat >file.9 <<EOF -1 -2 -3 -4 -5 -6 -7 -8 -9 X -EOF - -cat >result.1 <<EOF -1 X -2 -3 -4 -5 -6 -7 -8 -9 -EOF - -cat >result.1-5 <<EOF -1 X -2 -3 -4 -5 X -6 -7 -8 -9 -EOF - -cat >result.1-5-9 <<EOF -1 X -2 -3 -4 -5 X -6 -7 -8 -9 X -EOF - -create_merge_msgs() { - echo "Merge commit 'c2'" >msg.1-5 && - echo "Merge commit 'c2'; commit 'c3'" >msg.1-5-9 && - echo "Squashed commit of the following:" >squash.1 && - echo >>squash.1 && - git log --no-merges ^HEAD c1 >>squash.1 && - echo "Squashed commit of the following:" >squash.1-5 && - echo >>squash.1-5 && - git log --no-merges ^HEAD c2 >>squash.1-5 && - echo "Squashed commit of the following:" >squash.1-5-9 && - echo >>squash.1-5-9 && - git log --no-merges ^HEAD c2 c3 >>squash.1-5-9 && - echo > msg.nolog && - echo "* commit 'c3':" >msg.log && - echo " commit 3" >>msg.log && - echo >>msg.log -} - -verify_diff() { - if ! test_cmp "$1" "$2" - then - echo "$3" - false - fi -} - -verify_merge() { - verify_diff "$2" "$1" "[OOPS] bad merge result" && - if test $(git ls-files -u | wc -l) -gt 0 - then - echo "[OOPS] unmerged files" - false - fi && - if test_must_fail git diff --exit-code - then - echo "[OOPS] working tree != index" - false - fi && - if test -n "$3" - then - git show -s --pretty=format:%s HEAD >msg.act && - verify_diff "$3" msg.act "[OOPS] bad merge message" - fi -} - -verify_head() { - if test "$1" != "$(git rev-parse HEAD)" - then - echo "[OOPS] HEAD != $1" - false - fi -} - -verify_parents() { - i=1 - while test $# -gt 0 - do - if test "$1" != "$(git rev-parse HEAD^$i)" +test_expect_success 'set up test data and helpers' ' + printf "%s\n" 1 2 3 4 5 6 7 8 9 >file && + printf "%s\n" "1 X" 2 3 4 5 6 7 8 9 >file.1 && + printf "%s\n" 1 2 3 4 "5 X" 6 7 8 9 >file.5 && + printf "%s\n" 1 2 3 4 5 6 7 8 "9 X" >file.9 && + printf "%s\n" "1 X" 2 3 4 5 6 7 8 9 >result.1 && + printf "%s\n" "1 X" 2 3 4 "5 X" 6 7 8 9 >result.1-5 && + printf "%s\n" "1 X" 2 3 4 "5 X" 6 7 8 "9 X" >result.1-5-9 && + + create_merge_msgs() { + echo "Merge commit '\''c2'\''" >msg.1-5 && + echo "Merge commit '\''c2'\''; commit '\''c3'\''" >msg.1-5-9 && + { + echo "Squashed commit of the following:" && + echo && + git log --no-merges ^HEAD c1 + } >squash.1 && + { + echo "Squashed commit of the following:" && + echo && + git log --no-merges ^HEAD c2 + } >squash.1-5 && + { + echo "Squashed commit of the following:" && + echo && + git log --no-merges ^HEAD c2 c3 + } >squash.1-5-9 && + echo >msg.nolog && + { + echo "* commit '\''c3'\'':" && + echo " commit 3" && + echo + } >msg.log + } && + + verify_merge() { + test_cmp "$2" "$1" && + git update-index --refresh && + git diff --exit-code && + if test -n "$3" then - echo "[OOPS] HEAD^$i != $1" - return 1 + git show -s --pretty=format:%s HEAD >msg.act && + test_cmp "$3" msg.act fi - i=$(expr $i + 1) - shift - done -} - -verify_mergeheads() { - i=1 - if ! test -f .git/MERGE_HEAD - then - echo "[OOPS] MERGE_HEAD is missing" - false - fi && - while test $# -gt 0 - do - head=$(head -n $i .git/MERGE_HEAD | sed -ne \$p) - if test "$1" != "$head" - then - echo "[OOPS] MERGE_HEAD $i != $1" + } && + + verify_head() { + echo "$1" >head.expected && + git rev-parse HEAD >head.actual && + test_cmp head.expected head.actual + } && + + verify_parents() { + printf "%s\n" "$@" >parents.expected && + >parents.actual && + i=1 && + while test $i -le $# + do + git rev-parse HEAD^$i >>parents.actual && + i=$(expr $i + 1) || return 1 - fi - i=$(expr $i + 1) - shift - done -} + done && + test_cmp parents.expected parents.actual + } && -verify_no_mergehead() { - if test -f .git/MERGE_HEAD - then - echo "[OOPS] MERGE_HEAD exists" - false - fi -} + verify_mergeheads() { + printf "%s\n" "$@" >mergehead.expected && + test_cmp mergehead.expected .git/MERGE_HEAD + } && + verify_no_mergehead() { + ! test -e .git/MERGE_HEAD + } +' test_expect_success 'setup' ' git add file && @@ -219,7 +133,7 @@ test_expect_success 'setup' ' create_merge_msgs ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'test option parsing' ' test_must_fail git merge -$ c1 && @@ -235,13 +149,19 @@ test_expect_success 'reject non-strategy with a git-merge-foo name' ' ' test_expect_success 'merge c0 with c1' ' + echo "OBJID HEAD@{0}: merge c1: Fast-forward" >reflog.expected && + git reset --hard c0 && git merge c1 && verify_merge file result.1 && - verify_head "$c1" + verify_head "$c1" && + + git reflog -1 >reflog.actual && + sed "s/$_x05[0-9a-f]*/OBJID/g" reflog.actual >reflog.fuzzy && + test_cmp reflog.expected reflog.fuzzy ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 with --ff-only' ' git reset --hard c0 && @@ -251,7 +171,28 @@ test_expect_success 'merge c0 with c1 with --ff-only' ' verify_head "$c1" ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' + +test_expect_success 'merge from unborn branch' ' + git checkout -f master && + test_might_fail git branch -D kid && + + echo "OBJID HEAD@{0}: initial pull" >reflog.expected && + + git checkout --orphan kid && + test_when_finished "git checkout -f master" && + git rm -fr . && + test_tick && + git merge --ff-only c1 && + verify_merge file result.1 && + verify_head "$c1" && + + git reflog -1 >reflog.actual && + sed "s/$_x05[0-9a-f][0-9a-f]/OBJID/g" reflog.actual >reflog.fuzzy && + test_cmp reflog.expected reflog.fuzzy +' + +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2' ' git reset --hard c1 && @@ -261,7 +202,7 @@ test_expect_success 'merge c1 with c2' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 and c3' ' git reset --hard c1 && @@ -271,7 +212,7 @@ test_expect_success 'merge c1 with c2 and c3' ' verify_parents $c1 $c2 $c3 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'failing merges with --ff-only' ' git reset --hard c1 && @@ -288,7 +229,7 @@ test_expect_success 'merge c0 with c1 (no-commit)' ' verify_head $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (no-commit)' ' git reset --hard c1 && @@ -298,7 +239,7 @@ test_expect_success 'merge c1 with c2 (no-commit)' ' verify_mergeheads $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 and c3 (no-commit)' ' git reset --hard c1 && @@ -308,7 +249,7 @@ test_expect_success 'merge c1 with c2 and c3 (no-commit)' ' verify_mergeheads $c2 $c3 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 (squash)' ' git reset --hard c0 && @@ -316,10 +257,10 @@ test_expect_success 'merge c0 with c1 (squash)' ' verify_merge file result.1 && verify_head $c0 && verify_no_mergehead && - verify_diff squash.1 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 (squash, ff-only)' ' git reset --hard c0 && @@ -327,10 +268,10 @@ test_expect_success 'merge c0 with c1 (squash, ff-only)' ' verify_merge file result.1 && verify_head $c0 && verify_no_mergehead && - verify_diff squash.1 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (squash)' ' git reset --hard c1 && @@ -338,17 +279,17 @@ test_expect_success 'merge c1 with c2 (squash)' ' verify_merge file result.1-5 && verify_head $c1 && verify_no_mergehead && - verify_diff squash.1-5 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1-5 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'unsuccesful merge of c1 with c2 (squash, ff-only)' ' git reset --hard c1 && test_must_fail git merge --squash --ff-only c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 and c3 (squash)' ' git reset --hard c1 && @@ -356,10 +297,10 @@ test_expect_success 'merge c1 with c2 and c3 (squash)' ' verify_merge file result.1-5-9 && verify_head $c1 && verify_no_mergehead && - verify_diff squash.1-5-9 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1-5-9 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (no-commit in config)' ' git reset --hard c1 && @@ -370,7 +311,7 @@ test_expect_success 'merge c1 with c2 (no-commit in config)' ' verify_mergeheads $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (squash in config)' ' git reset --hard c1 && @@ -379,10 +320,10 @@ test_expect_success 'merge c1 with c2 (squash in config)' ' verify_merge file result.1-5 && verify_head $c1 && verify_no_mergehead && - verify_diff squash.1-5 .git/SQUASH_MSG "[OOPS] bad squash message" + test_cmp squash.1-5 .git/SQUASH_MSG ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'override config option -n with --summary' ' git reset --hard c1 && @@ -412,7 +353,7 @@ test_expect_success 'override config option -n with --stat' ' fi ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'override config option --stat' ' git reset --hard c1 && @@ -428,7 +369,7 @@ test_expect_success 'override config option --stat' ' fi ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (override --no-commit)' ' git reset --hard c1 && @@ -439,7 +380,7 @@ test_expect_success 'merge c1 with c2 (override --no-commit)' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c2 (override --squash)' ' git reset --hard c1 && @@ -450,7 +391,7 @@ test_expect_success 'merge c1 with c2 (override --squash)' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c0 with c1 (no-ff)' ' git reset --hard c0 && @@ -461,7 +402,7 @@ test_expect_success 'merge c0 with c1 (no-ff)' ' verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'combining --squash and --no-ff is refused' ' test_must_fail git merge --squash --no-ff c1 && @@ -485,20 +426,20 @@ test_expect_success 'merge log message' ' git reset --hard c0 && git merge --no-log c2 && git show -s --pretty=format:%b HEAD >msg.act && - verify_diff msg.nolog msg.act "[OOPS] bad merge log message" && + test_cmp msg.nolog msg.act && git merge --log c3 && git show -s --pretty=format:%b HEAD >msg.act && - verify_diff msg.log msg.act "[OOPS] bad merge log message" && + test_cmp msg.log msg.act && git reset --hard HEAD^ && git config merge.log yes && git merge c3 && git show -s --pretty=format:%b HEAD >msg.act && - verify_diff msg.log msg.act "[OOPS] bad merge log message" + test_cmp msg.log msg.act ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c0, c2, c0, and c1' ' git reset --hard c1 && @@ -509,7 +450,7 @@ test_expect_success 'merge c1 with c0, c2, c0, and c1' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c0, c2, c0, and c1' ' git reset --hard c1 && @@ -520,7 +461,7 @@ test_expect_success 'merge c1 with c0, c2, c0, and c1' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge c1 with c1 and c2' ' git reset --hard c1 && @@ -531,7 +472,7 @@ test_expect_success 'merge c1 with c1 and c2' ' verify_parents $c1 $c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge fast-forward in a dirty tree' ' git reset --hard c0 && @@ -541,16 +482,16 @@ test_expect_success 'merge fast-forward in a dirty tree' ' git merge c2 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'in-index merge' ' git reset --hard c0 && - git merge --no-ff -s resolve c1 > out && + git merge --no-ff -s resolve c1 >out && grep "Wonderful." out && verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'refresh the index before merging' ' git reset --hard c1 && @@ -558,31 +499,39 @@ test_expect_success 'refresh the index before merging' ' git merge c3 ' -cat >expected <<EOF -Merge branch 'c5' (early part) +cat >expected.branch <<\EOF +Merge branch 'c5-branch' (early part) +EOF +cat >expected.tag <<\EOF +Merge commit 'c5~1' EOF test_expect_success 'merge early part of c2' ' git reset --hard c3 && - echo c4 > c4.c && + echo c4 >c4.c && git add c4.c && git commit -m c4 && git tag c4 && - echo c5 > c5.c && + echo c5 >c5.c && git add c5.c && git commit -m c5 && git tag c5 && git reset --hard c3 && - echo c6 > c6.c && + echo c6 >c6.c && git add c6.c && git commit -m c6 && git tag c6 && + git branch -f c5-branch c5 && + git merge c5-branch~1 && + git show -s --pretty=format:%s HEAD >actual.branch && + git reset --keep HEAD^ && git merge c5~1 && - git show -s --pretty=format:%s HEAD > actual && - test_cmp actual expected + git show -s --pretty=format:%s HEAD >actual.tag && + test_cmp expected.branch actual.branch && + test_cmp expected.tag actual.tag ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'merge --no-ff --no-commit && commit' ' git reset --hard c0 && @@ -591,13 +540,13 @@ test_expect_success 'merge --no-ff --no-commit && commit' ' verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_expect_success 'amending no-ff merge commit' ' EDITOR=: git commit --amend && verify_parents $c0 $c1 ' -test_debug 'gitk --all' +test_debug 'git log --graph --decorate --oneline --all' test_done diff --git a/t/t7606-merge-custom.sh b/t/t7606-merge-custom.sh index 52a451dd57..8e8c4d7246 100755 --- a/t/t7606-merge-custom.sh +++ b/t/t7606-merge-custom.sh @@ -1,49 +1,93 @@ #!/bin/sh -test_description='git merge +test_description="git merge -Testing a custom strategy.' +Testing a custom strategy. + +* (HEAD, master) Merge commit 'c3' +|\ +| * (tag: c3) c3 +* | (tag: c1) c1 +|/ +| * tag: c2) c2 +|/ +* (tag: c0) c0 +" . ./test-lib.sh -cat >git-merge-theirs <<EOF -#!$SHELL_PATH -eval git read-tree --reset -u \\\$\$# -EOF -chmod +x git-merge-theirs -PATH=.:$PATH -export PATH +test_expect_success 'set up custom strategy' ' + cat >git-merge-theirs <<-EOF && + #!$SHELL_PATH + eval git read-tree --reset -u \\\$\$# + EOF + + chmod +x git-merge-theirs && + PATH=.:$PATH && + export PATH +' test_expect_success 'setup' ' - echo c0 >c0.c && - git add c0.c && - git commit -m c0 && - git tag c0 && - echo c1 >c1.c && - git add c1.c && - git commit -m c1 && - git tag c1 && - git reset --hard c0 && + test_commit c0 c0.c && + test_commit c1 c1.c && + git reset --keep c0 && echo c1c1 >c1.c && - echo c2 >c2.c && - git add c1.c c2.c && - git commit -m c2 && - git tag c2 + git add c1.c && + test_commit c2 c2.c && + git reset --keep c0 && + test_commit c3 c3.c ' test_expect_success 'merge c2 with a custom strategy' ' git reset --hard c1 && + + git rev-parse c1 >head.old && + git rev-parse c2 >second-parent.expected && + git rev-parse c2^{tree} >tree.expected && git merge -s theirs c2 && - test "$(git rev-parse c1)" != "$(git rev-parse HEAD)" && - test "$(git rev-parse c1)" = "$(git rev-parse HEAD^1)" && - test "$(git rev-parse c2)" = "$(git rev-parse HEAD^2)" && - test "$(git rev-parse c2^{tree})" = "$(git rev-parse HEAD^{tree})" && + + git rev-parse HEAD >head.new && + git rev-parse HEAD^1 >first-parent && + git rev-parse HEAD^2 >second-parent && + git rev-parse HEAD^{tree} >tree && + git update-index --refresh && git diff --exit-code && git diff --exit-code c2 HEAD && git diff --exit-code c2 && + + ! test_cmp head.old head.new && + test_cmp head.old first-parent && + test_cmp second-parent.expected second-parent && + test_cmp tree.expected tree && test -f c0.c && grep c1c1 c1.c && test -f c2.c ' +test_expect_success 'trivial merge with custom strategy' ' + git reset --hard c1 && + + git rev-parse c1 >head.old && + git rev-parse c3 >second-parent.expected && + git rev-parse c3^{tree} >tree.expected && + git merge -s theirs c3 && + + git rev-parse HEAD >head.new && + git rev-parse HEAD^1 >first-parent && + git rev-parse HEAD^2 >second-parent && + git rev-parse HEAD^{tree} >tree && + git update-index --refresh && + git diff --exit-code && + git diff --exit-code c3 HEAD && + git diff --exit-code c3 && + + ! test_cmp head.old head.new && + test_cmp head.old first-parent && + test_cmp second-parent.expected second-parent && + test_cmp tree.expected tree && + test -f c0.c && + ! test -e c1.c && + test -f c3.c +' + test_done diff --git a/t/t7607-merge-overwrite.sh b/t/t7607-merge-overwrite.sh index 49f4e1599a..d82349a6a8 100755 --- a/t/t7607-merge-overwrite.sh +++ b/t/t7607-merge-overwrite.sh @@ -31,7 +31,7 @@ test_expect_success 'setup' ' test_expect_success 'will not overwrite untracked file' ' git reset --hard c1 && cat important > c2.c && - ! git merge c2 && + test_must_fail git merge c2 && test_cmp important c2.c ' @@ -39,7 +39,7 @@ test_expect_success 'will not overwrite new file' ' git reset --hard c1 && cat important > c2.c && git add c2.c && - ! git merge c2 && + test_must_fail git merge c2 && test_cmp important c2.c ' @@ -48,7 +48,7 @@ test_expect_success 'will not overwrite staged changes' ' cat important > c2.c && git add c2.c && rm c2.c && - ! git merge c2 && + test_must_fail git merge c2 && git checkout c2.c && test_cmp important c2.c ' @@ -58,7 +58,7 @@ test_expect_success 'will not overwrite removed file' ' git rm c1.c && git commit -m "rm c1.c" && cat important > c1.c && - ! git merge c1a && + test_must_fail git merge c1a && test_cmp important c1.c ' @@ -68,7 +68,7 @@ test_expect_success 'will not overwrite re-added file' ' git commit -m "rm c1.c" && cat important > c1.c && git add c1.c && - ! git merge c1a && + test_must_fail git merge c1a && test_cmp important c1.c ' @@ -79,7 +79,7 @@ test_expect_success 'will not overwrite removed file with staged changes' ' cat important > c1.c && git add c1.c && rm c1.c && - ! git merge c1a && + test_must_fail git merge c1a && git checkout c1.c && test_cmp important c1.c ' diff --git a/t/t7609-merge-co-error-msgs.sh b/t/t7609-merge-co-error-msgs.sh new file mode 100755 index 0000000000..114d2bd785 --- /dev/null +++ b/t/t7609-merge-co-error-msgs.sh @@ -0,0 +1,133 @@ +#!/bin/sh + +test_description='unpack-trees error messages' + +. ./test-lib.sh + + +test_expect_success 'setup' ' + echo one >one && + git add one && + git commit -a -m First && + + git checkout -b branch && + echo two >two && + echo three >three && + echo four >four && + echo five >five && + git add two three four five && + git commit -m Second && + + git checkout master && + echo other >two && + echo other >three && + echo other >four && + echo other >five +' + +cat >expect <<\EOF +error: The following untracked working tree files would be overwritten by merge: + two + three + four + five +Please move or remove them before you can merge. +EOF + +test_expect_success 'untracked files overwritten by merge (fast and non-fast forward)' ' + test_must_fail git merge branch 2>out && + test_cmp out expect && + git commit --allow-empty -m empty && + ( + GIT_MERGE_VERBOSITY=0 && + export GIT_MERGE_VERBOSITY && + test_must_fail git merge branch 2>out2 + ) && + test_cmp out2 expect && + git reset --hard HEAD^ +' + +cat >expect <<\EOF +error: Your local changes to the following files would be overwritten by merge: + two + three + four +Please, commit your changes or stash them before you can merge. +error: The following untracked working tree files would be overwritten by merge: + five +Please move or remove them before you can merge. +EOF + +test_expect_success 'untracked files or local changes ovewritten by merge' ' + git add two && + git add three && + git add four && + test_must_fail git merge branch 2>out && + test_cmp out expect +' + +cat >expect <<\EOF +error: Your local changes to the following files would be overwritten by checkout: + rep/two + rep/one +Please, commit your changes or stash them before you can switch branches. +EOF + +test_expect_success 'cannot switch branches because of local changes' ' + git add five && + mkdir rep && + echo one >rep/one && + echo two >rep/two && + git add rep/one rep/two && + git commit -m Fourth && + git checkout master && + echo uno >rep/one && + echo dos >rep/two && + test_must_fail git checkout branch 2>out && + test_cmp out expect +' + +cat >expect <<\EOF +error: Your local changes to the following files would be overwritten by checkout: + rep/two + rep/one +Please, commit your changes or stash them before you can switch branches. +EOF + +test_expect_success 'not uptodate file porcelain checkout error' ' + git add rep/one rep/two && + test_must_fail git checkout branch 2>out && + test_cmp out expect +' + +cat >expect <<\EOF +error: Updating the following directories would lose untracked files in it: + rep2 + rep + +EOF + +test_expect_success 'not_uptodate_dir porcelain checkout error' ' + git init uptodate && + cd uptodate && + mkdir rep && + mkdir rep2 && + touch rep/foo && + touch rep2/foo && + git add rep/foo rep2/foo && + git commit -m init && + git checkout -b branch && + git rm rep -r && + git rm rep2 -r && + >rep && + >rep2 && + git add rep rep2&& + git commit -m "added test as a file" && + git checkout master && + >rep/untracked-file && + >rep2/untracked-file && + test_must_fail git checkout branch 2>out && + test_cmp out ../expect +' + +test_done diff --git a/t/t7610-mergetool.sh b/t/t7610-mergetool.sh index e768c3eb2d..3bd74042ef 100755 --- a/t/t7610-mergetool.sh +++ b/t/t7610-mergetool.sh @@ -14,6 +14,7 @@ Testing basic merge tool invocation' # running mergetool test_expect_success 'setup' ' + git config rerere.enabled true && echo master >file1 && mkdir subdir && echo master sub >subdir/file3 && @@ -67,23 +68,47 @@ test_expect_success 'mergetool crlf' ' ' test_expect_success 'mergetool in subdir' ' - git checkout -b test3 branch1 - cd subdir && ( - test_must_fail git merge master >/dev/null 2>&1 && - ( yes "" | git mergetool file3 >/dev/null 2>&1 ) && - test "$(cat file3)" = "master new sub" ) + git checkout -b test3 branch1 && + ( + cd subdir && + test_must_fail git merge master >/dev/null 2>&1 && + ( yes "" | git mergetool file3 >/dev/null 2>&1 ) && + test "$(cat file3)" = "master new sub" + ) ' -# We can't merge files from parent directories when running mergetool -# from a subdir. Is this a bug? -# -#test_expect_failure 'mergetool in subdir' ' -# cd subdir && ( -# ( yes "" | git mergetool ../file1 >/dev/null 2>&1 ) && -# ( yes "" | git mergetool ../file2 >/dev/null 2>&1 ) && -# test "$(cat ../file1)" = "master updated" && -# test "$(cat ../file2)" = "master new" && -# git commit -m "branch1 resolved with mergetool - subdir" ) -#' +test_expect_success 'mergetool on file in parent dir' ' + ( + cd subdir && + ( yes "" | git mergetool ../file1 >/dev/null 2>&1 ) && + ( yes "" | git mergetool ../file2 >/dev/null 2>&1 ) && + test "$(cat ../file1)" = "master updated" && + test "$(cat ../file2)" = "master new" && + git commit -m "branch1 resolved with mergetool - subdir" + ) +' + +test_expect_success 'mergetool skips autoresolved' ' + git checkout -b test4 branch1 && + test_must_fail git merge master && + test -n "$(git ls-files -u)" && + output="$(git mergetool --no-prompt)" && + test "$output" = "No files need merging" && + git reset --hard +' + +test_expect_success 'mergetool merges all from subdir' ' + ( + cd subdir && + git config rerere.enabled false && + test_must_fail git merge master && + git mergetool --no-prompt && + test "$(cat ../file1)" = "master updated" && + test "$(cat ../file2)" = "master new" && + test "$(cat file3)" = "master new sub" && + git add ../file1 ../file2 file3 && + git commit -m "branch2 resolved by mergetool from subdir" + ) +' test_done diff --git a/t/t7800-difftool.sh b/t/t7800-difftool.sh index 196827e7ea..58dc6f6452 100755 --- a/t/t7800-difftool.sh +++ b/t/t7800-difftool.sh @@ -10,11 +10,6 @@ Testing basic diff tool invocation . ./test-lib.sh -if ! test_have_prereq PERL; then - skip_all='skipping difftool tests, perl not available' - test_done -fi - LF=' ' @@ -50,7 +45,7 @@ prompt_given() } # Create a file on master and change it on branch -test_expect_success 'setup' ' +test_expect_success PERL 'setup' ' echo master >file && git add file && git commit -m "added file" && @@ -62,7 +57,7 @@ test_expect_success 'setup' ' ' # Configure a custom difftool.<tool>.cmd and use it -test_expect_success 'custom commands' ' +test_expect_success PERL 'custom commands' ' restore_test_defaults && git config difftool.test-tool.cmd "cat \$REMOTE" && @@ -75,13 +70,13 @@ test_expect_success 'custom commands' ' ' # Ensures that git-difftool ignores bogus --tool values -test_expect_success 'difftool ignores bad --tool values' ' +test_expect_success PERL 'difftool ignores bad --tool values' ' diff=$(git difftool --no-prompt --tool=bad-tool branch) test "$?" = 1 && test "$diff" = "" ' -test_expect_success 'difftool honors --gui' ' +test_expect_success PERL 'difftool honors --gui' ' git config merge.tool bogus-tool && git config diff.tool bogus-tool && git config diff.guitool test-tool && @@ -92,7 +87,7 @@ test_expect_success 'difftool honors --gui' ' restore_test_defaults ' -test_expect_success 'difftool --gui works without configured diff.guitool' ' +test_expect_success PERL 'difftool --gui works without configured diff.guitool' ' git config diff.tool test-tool && diff=$(git difftool --no-prompt --gui branch) && @@ -102,7 +97,7 @@ test_expect_success 'difftool --gui works without configured diff.guitool' ' ' # Specify the diff tool using $GIT_DIFF_TOOL -test_expect_success 'GIT_DIFF_TOOL variable' ' +test_expect_success PERL 'GIT_DIFF_TOOL variable' ' git config --unset diff.tool GIT_DIFF_TOOL=test-tool && export GIT_DIFF_TOOL && @@ -115,7 +110,7 @@ test_expect_success 'GIT_DIFF_TOOL variable' ' # Test the $GIT_*_TOOL variables and ensure # that $GIT_DIFF_TOOL always wins unless --tool is specified -test_expect_success 'GIT_DIFF_TOOL overrides' ' +test_expect_success PERL 'GIT_DIFF_TOOL overrides' ' git config diff.tool bogus-tool && git config merge.tool bogus-tool && @@ -136,7 +131,7 @@ test_expect_success 'GIT_DIFF_TOOL overrides' ' # Test that we don't have to pass --no-prompt to difftool # when $GIT_DIFFTOOL_NO_PROMPT is true -test_expect_success 'GIT_DIFFTOOL_NO_PROMPT variable' ' +test_expect_success PERL 'GIT_DIFFTOOL_NO_PROMPT variable' ' GIT_DIFFTOOL_NO_PROMPT=true && export GIT_DIFFTOOL_NO_PROMPT && @@ -148,7 +143,7 @@ test_expect_success 'GIT_DIFFTOOL_NO_PROMPT variable' ' # git-difftool supports the difftool.prompt variable. # Test that GIT_DIFFTOOL_PROMPT can override difftool.prompt = false -test_expect_success 'GIT_DIFFTOOL_PROMPT variable' ' +test_expect_success PERL 'GIT_DIFFTOOL_PROMPT variable' ' git config difftool.prompt false && GIT_DIFFTOOL_PROMPT=true && export GIT_DIFFTOOL_PROMPT && @@ -160,7 +155,7 @@ test_expect_success 'GIT_DIFFTOOL_PROMPT variable' ' ' # Test that we don't have to pass --no-prompt when difftool.prompt is false -test_expect_success 'difftool.prompt config variable is false' ' +test_expect_success PERL 'difftool.prompt config variable is false' ' git config difftool.prompt false && diff=$(git difftool branch) && @@ -170,7 +165,7 @@ test_expect_success 'difftool.prompt config variable is false' ' ' # Test that we don't have to pass --no-prompt when mergetool.prompt is false -test_expect_success 'difftool merge.prompt = false' ' +test_expect_success PERL 'difftool merge.prompt = false' ' git config --unset difftool.prompt git config mergetool.prompt false && @@ -181,7 +176,7 @@ test_expect_success 'difftool merge.prompt = false' ' ' # Test that the -y flag can override difftool.prompt = true -test_expect_success 'difftool.prompt can overridden with -y' ' +test_expect_success PERL 'difftool.prompt can overridden with -y' ' git config difftool.prompt true && diff=$(git difftool -y branch) && @@ -191,7 +186,7 @@ test_expect_success 'difftool.prompt can overridden with -y' ' ' # Test that the --prompt flag can override difftool.prompt = false -test_expect_success 'difftool.prompt can overridden with --prompt' ' +test_expect_success PERL 'difftool.prompt can overridden with --prompt' ' git config difftool.prompt false && prompt=$(echo | git difftool --prompt branch | tail -1) && @@ -201,7 +196,7 @@ test_expect_success 'difftool.prompt can overridden with --prompt' ' ' # Test that the last flag passed on the command-line wins -test_expect_success 'difftool last flag wins' ' +test_expect_success PERL 'difftool last flag wins' ' diff=$(git difftool --prompt --no-prompt branch) && test "$diff" = "branch" && @@ -215,7 +210,7 @@ test_expect_success 'difftool last flag wins' ' # git-difftool falls back to git-mergetool config variables # so test that behavior here -test_expect_success 'difftool + mergetool config variables' ' +test_expect_success PERL 'difftool + mergetool config variables' ' remove_config_vars git config merge.tool test-tool && git config mergetool.test-tool.cmd "cat \$LOCAL" && @@ -233,7 +228,7 @@ test_expect_success 'difftool + mergetool config variables' ' restore_test_defaults ' -test_expect_success 'difftool.<tool>.path' ' +test_expect_success PERL 'difftool.<tool>.path' ' git config difftool.tkdiff.path echo && diff=$(git difftool --tool=tkdiff --no-prompt branch) && git config --unset difftool.tkdiff.path && @@ -243,32 +238,32 @@ test_expect_success 'difftool.<tool>.path' ' restore_test_defaults ' -test_expect_success 'difftool --extcmd=cat' ' +test_expect_success PERL 'difftool --extcmd=cat' ' diff=$(git difftool --no-prompt --extcmd=cat branch) && test "$diff" = branch"$LF"master ' -test_expect_success 'difftool --extcmd cat' ' +test_expect_success PERL 'difftool --extcmd cat' ' diff=$(git difftool --no-prompt --extcmd cat branch) && test "$diff" = branch"$LF"master ' -test_expect_success 'difftool -x cat' ' +test_expect_success PERL 'difftool -x cat' ' diff=$(git difftool --no-prompt -x cat branch) && test "$diff" = branch"$LF"master ' -test_expect_success 'difftool --extcmd echo arg1' ' +test_expect_success PERL 'difftool --extcmd echo arg1' ' diff=$(git difftool --no-prompt --extcmd sh\ -c\ \"echo\ \$1\" branch) test "$diff" = file ' -test_expect_success 'difftool --extcmd cat arg1' ' +test_expect_success PERL 'difftool --extcmd cat arg1' ' diff=$(git difftool --no-prompt --extcmd sh\ -c\ \"cat\ \$1\" branch) test "$diff" = master ' -test_expect_success 'difftool --extcmd cat arg2' ' +test_expect_success PERL 'difftool --extcmd cat arg2' ' diff=$(git difftool --no-prompt --extcmd sh\ -c\ \"cat\ \$2\" branch) test "$diff" = branch ' diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 8a6322765c..023f225a4b 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -65,7 +65,7 @@ do test_expect_success "grep -w $L (w)" ' : >expected && - ! git grep -n -w -e "^w" >actual && + test_must_fail git grep -n -w -e "^w" >actual && test_cmp expected actual ' diff --git a/t/t9001-send-email.sh b/t/t9001-send-email.sh index 23597cc407..71b3df9b55 100755 --- a/t/t9001-send-email.sh +++ b/t/t9001-send-email.sh @@ -3,20 +3,17 @@ test_description='git send-email' . ./test-lib.sh -if ! test_have_prereq PERL; then - skip_all='skipping git send-email tests, perl not available' - test_done -fi +# May be altered later in the test +PREREQ="PERL" -PROG='git send-email' -test_expect_success \ +test_expect_success $PREREQ \ 'prepare reference tree' \ 'echo "1A quick brown fox jumps over the" >file && echo "lazy dog" >>file && git add file && GIT_AUTHOR_NAME="A" git commit -a -m "Initial."' -test_expect_success \ +test_expect_success $PREREQ \ 'Setup helper tool' \ '(echo "#!$SHELL_PATH" echo shift @@ -36,7 +33,7 @@ clean_fake_sendmail() { rm -f commandline* msgtxt* } -test_expect_success 'Extract patches' ' +test_expect_success $PREREQ 'Extract patches' ' patches=`git format-patch -s --cc="One <one@example.com>" --cc=two@example.com -n HEAD^1` ' @@ -57,49 +54,56 @@ test_no_confirm () { # Exit immediately to prevent hang if a no-confirm test fails check_no_confirm () { - test -f no_confirm_okay || { - skip_all='confirm test failed; skipping remaining tests to prevent hanging' - test_done - } + if ! test -f no_confirm_okay + then + say 'confirm test failed; skipping remaining tests to prevent hanging' + PREREQ="$PREREQ,CHECK_NO_CONFIRM" + fi + return 0 } -test_expect_success 'No confirm with --suppress-cc' ' - test_no_confirm --suppress-cc=sob +test_expect_success $PREREQ 'No confirm with --suppress-cc' ' + test_no_confirm --suppress-cc=sob && + check_no_confirm ' -check_no_confirm -test_expect_success 'No confirm with --confirm=never' ' - test_no_confirm --confirm=never + +test_expect_success $PREREQ 'No confirm with --confirm=never' ' + test_no_confirm --confirm=never && + check_no_confirm ' -check_no_confirm # leave sendemail.confirm set to never after this so that none of the # remaining tests prompt unintentionally. -test_expect_success 'No confirm with sendemail.confirm=never' ' +test_expect_success $PREREQ 'No confirm with sendemail.confirm=never' ' git config sendemail.confirm never && - test_no_confirm --compose --subject=foo + test_no_confirm --compose --subject=foo && + check_no_confirm ' -check_no_confirm -test_expect_success 'Send patches' ' +test_expect_success $PREREQ 'Send patches' ' git send-email --suppress-cc=sob --from="Example <nobody@example.com>" --to=nobody@example.com --smtp-server="$(pwd)/fake.sendmail" $patches 2>errors ' +test_expect_success $PREREQ 'setup expect' ' cat >expected <<\EOF !nobody@example.com! !author@example.com! !one@example.com! !two@example.com! EOF -test_expect_success \ +' + +test_expect_success $PREREQ \ 'Verify commandline' \ 'test_cmp expected commandline1' -test_expect_success 'Send patches with --envelope-sender' ' +test_expect_success $PREREQ 'Send patches with --envelope-sender' ' clean_fake_sendmail && git send-email --envelope-sender="Patch Contributer <patch@example.com>" --suppress-cc=sob --from="Example <nobody@example.com>" --to=nobody@example.com --smtp-server="$(pwd)/fake.sendmail" $patches 2>errors ' +test_expect_success $PREREQ 'setup expect' ' cat >expected <<\EOF !patch@example.com! !-i! @@ -108,15 +112,18 @@ cat >expected <<\EOF !one@example.com! !two@example.com! EOF -test_expect_success \ +' + +test_expect_success $PREREQ \ 'Verify commandline' \ 'test_cmp expected commandline1' -test_expect_success 'Send patches with --envelope-sender=auto' ' +test_expect_success $PREREQ 'Send patches with --envelope-sender=auto' ' clean_fake_sendmail && git send-email --envelope-sender=auto --suppress-cc=sob --from="Example <nobody@example.com>" --to=nobody@example.com --smtp-server="$(pwd)/fake.sendmail" $patches 2>errors ' +test_expect_success $PREREQ 'setup expect' ' cat >expected <<\EOF !nobody@example.com! !-i! @@ -125,10 +132,13 @@ cat >expected <<\EOF !one@example.com! !two@example.com! EOF -test_expect_success \ +' + +test_expect_success $PREREQ \ 'Verify commandline' \ 'test_cmp expected commandline1' +test_expect_success $PREREQ 'setup expect' " cat >expected-show-all-headers <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -158,8 +168,9 @@ References: <unique-message-id@example.com> Result: OK EOF +" -test_expect_success 'Show all headers' ' +test_expect_success $PREREQ 'Show all headers' ' git send-email \ --dry-run \ --suppress-cc=sob \ @@ -177,7 +188,7 @@ test_expect_success 'Show all headers' ' test_cmp expected-show-all-headers actual-show-all-headers ' -test_expect_success 'Prompting works' ' +test_expect_success $PREREQ 'Prompting works' ' clean_fake_sendmail && (echo "Example <from@example.com>" echo "to@example.com" @@ -190,7 +201,7 @@ test_expect_success 'Prompting works' ' grep "^To: to@example.com\$" msgtxt1 ' -test_expect_success 'cccmd works' ' +test_expect_success $PREREQ 'cccmd works' ' clean_fake_sendmail && cp $patches cccmd.patch && echo cccmd--cccmd@example.com >>cccmd.patch && @@ -209,10 +220,10 @@ test_expect_success 'cccmd works' ' grep "^ cccmd@example.com" msgtxt1 ' -z8=zzzzzzzz -z64=$z8$z8$z8$z8$z8$z8$z8$z8 -z512=$z64$z64$z64$z64$z64$z64$z64$z64 -test_expect_success 'reject long lines' ' +test_expect_success $PREREQ 'reject long lines' ' + z8=zzzzzzzz && + z64=$z8$z8$z8$z8$z8$z8$z8$z8 && + z512=$z64$z64$z64$z64$z64$z64$z64$z64 && clean_fake_sendmail && cp $patches longline.patch && echo $z512$z512 >>longline.patch && @@ -225,11 +236,11 @@ test_expect_success 'reject long lines' ' grep longline.patch errors ' -test_expect_success 'no patch was sent' ' +test_expect_success $PREREQ 'no patch was sent' ' ! test -e commandline1 ' -test_expect_success 'Author From: in message body' ' +test_expect_success $PREREQ 'Author From: in message body' ' clean_fake_sendmail && git send-email \ --from="Example <nobody@example.com>" \ @@ -240,7 +251,7 @@ test_expect_success 'Author From: in message body' ' grep "From: A <author@example.com>" msgbody1 ' -test_expect_success 'Author From: not in message body' ' +test_expect_success $PREREQ 'Author From: not in message body' ' clean_fake_sendmail && git send-email \ --from="A <author@example.com>" \ @@ -251,7 +262,7 @@ test_expect_success 'Author From: not in message body' ' ! grep "From: A <author@example.com>" msgbody1 ' -test_expect_success 'allow long lines with --no-validate' ' +test_expect_success $PREREQ 'allow long lines with --no-validate' ' git send-email \ --from="Example <nobody@example.com>" \ --to=nobody@example.com \ @@ -261,7 +272,7 @@ test_expect_success 'allow long lines with --no-validate' ' 2>errors ' -test_expect_success 'Invalid In-Reply-To' ' +test_expect_success $PREREQ 'Invalid In-Reply-To' ' clean_fake_sendmail && git send-email \ --from="Example <nobody@example.com>" \ @@ -273,7 +284,7 @@ test_expect_success 'Invalid In-Reply-To' ' ! grep "^In-Reply-To: < *>" msgtxt1 ' -test_expect_success 'Valid In-Reply-To when prompting' ' +test_expect_success $PREREQ 'Valid In-Reply-To when prompting' ' clean_fake_sendmail && (echo "From Example <from@example.com>" echo "To Example <to@example.com>" @@ -284,7 +295,7 @@ test_expect_success 'Valid In-Reply-To when prompting' ' ! grep "^In-Reply-To: < *>" msgtxt1 ' -test_expect_success 'setup fake editor' ' +test_expect_success $PREREQ 'setup fake editor' ' (echo "#!$SHELL_PATH" && echo "echo fake edit >>\"\$1\"" ) >fake-editor && @@ -293,7 +304,7 @@ test_expect_success 'setup fake editor' ' test_set_editor "$(pwd)/fake-editor" -test_expect_success '--compose works' ' +test_expect_success $PREREQ '--compose works' ' clean_fake_sendmail && git send-email \ --compose --subject foo \ @@ -304,14 +315,15 @@ test_expect_success '--compose works' ' 2>errors ' -test_expect_success 'first message is compose text' ' +test_expect_success $PREREQ 'first message is compose text' ' grep "^fake edit" msgtxt1 ' -test_expect_success 'second message is patch' ' +test_expect_success $PREREQ 'second message is patch' ' grep "Subject:.*Second" msgtxt2 ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-sob <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -338,6 +350,7 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" test_suppression () { git send-email \ @@ -354,11 +367,12 @@ test_suppression () { test_cmp expected-suppress-$1${2+"-$2"} actual-suppress-$1${2+"-$2"} } -test_expect_success 'sendemail.cc set' ' +test_expect_success $PREREQ 'sendemail.cc set' ' git config sendemail.cc cc@example.com && test_suppression sob ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-sob <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -383,12 +397,14 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" -test_expect_success 'sendemail.cc unset' ' +test_expect_success $PREREQ 'sendemail.cc unset' ' git config --unset sendemail.cc && test_suppression sob ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-cccmd <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -416,14 +432,16 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" -test_expect_success 'sendemail.cccmd' ' +test_expect_success $PREREQ 'sendemail.cccmd' ' echo echo cc-cmd@example.com > cccmd && chmod +x cccmd && git config sendemail.cccmd ./cccmd && test_suppression cccmd ' +test_expect_success $PREREQ 'setup expect' ' cat >expected-suppress-all <<\EOF 0001-Second.patch Dry-OK. Log says: @@ -439,11 +457,13 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +' -test_expect_success '--suppress-cc=all' ' +test_expect_success $PREREQ '--suppress-cc=all' ' test_suppression all ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-body <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -471,11 +491,13 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" -test_expect_success '--suppress-cc=body' ' +test_expect_success $PREREQ '--suppress-cc=body' ' test_suppression body ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-body-cccmd <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -500,11 +522,13 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" -test_expect_success '--suppress-cc=body --suppress-cc=cccmd' ' +test_expect_success $PREREQ '--suppress-cc=body --suppress-cc=cccmd' ' test_suppression body cccmd ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-sob <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -529,12 +553,14 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" -test_expect_success '--suppress-cc=sob' ' +test_expect_success $PREREQ '--suppress-cc=sob' ' git config --unset sendemail.cccmd test_suppression sob ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-bodycc <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -562,11 +588,13 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" -test_expect_success '--suppress-cc=bodycc' ' +test_expect_success $PREREQ '--suppress-cc=bodycc' ' test_suppression bodycc ' +test_expect_success $PREREQ 'setup expect' " cat >expected-suppress-cc <<\EOF 0001-Second.patch (mbox) Adding cc: A <author@example.com> from line 'From: A <author@example.com>' @@ -588,8 +616,9 @@ X-Mailer: X-MAILER-STRING Result: OK EOF +" -test_expect_success '--suppress-cc=cc' ' +test_expect_success $PREREQ '--suppress-cc=cc' ' test_suppression cc ' @@ -604,23 +633,23 @@ test_confirm () { grep "Send this email" stdout } -test_expect_success '--confirm=always' ' +test_expect_success $PREREQ '--confirm=always' ' test_confirm --confirm=always --suppress-cc=all ' -test_expect_success '--confirm=auto' ' +test_expect_success $PREREQ '--confirm=auto' ' test_confirm --confirm=auto ' -test_expect_success '--confirm=cc' ' +test_expect_success $PREREQ '--confirm=cc' ' test_confirm --confirm=cc ' -test_expect_success '--confirm=compose' ' +test_expect_success $PREREQ '--confirm=compose' ' test_confirm --confirm=compose --compose ' -test_expect_success 'confirm by default (due to cc)' ' +test_expect_success $PREREQ 'confirm by default (due to cc)' ' CONFIRM=$(git config --get sendemail.confirm) && git config --unset sendemail.confirm && test_confirm @@ -629,7 +658,7 @@ test_expect_success 'confirm by default (due to cc)' ' test $ret = "0" ' -test_expect_success 'confirm by default (due to --compose)' ' +test_expect_success $PREREQ 'confirm by default (due to --compose)' ' CONFIRM=$(git config --get sendemail.confirm) && git config --unset sendemail.confirm && test_confirm --suppress-cc=all --compose @@ -638,7 +667,7 @@ test_expect_success 'confirm by default (due to --compose)' ' test $ret = "0" ' -test_expect_success 'confirm detects EOF (inform assumes y)' ' +test_expect_success $PREREQ 'confirm detects EOF (inform assumes y)' ' CONFIRM=$(git config --get sendemail.confirm) && git config --unset sendemail.confirm && rm -fr outdir && @@ -654,7 +683,7 @@ test_expect_success 'confirm detects EOF (inform assumes y)' ' test $ret = "0" ' -test_expect_success 'confirm detects EOF (auto causes failure)' ' +test_expect_success $PREREQ 'confirm detects EOF (auto causes failure)' ' CONFIRM=$(git config --get sendemail.confirm) && git config sendemail.confirm auto && GIT_SEND_EMAIL_NOTTY=1 && @@ -669,7 +698,7 @@ test_expect_success 'confirm detects EOF (auto causes failure)' ' test $ret = "0" ' -test_expect_success 'confirm doesnt loop forever' ' +test_expect_success $PREREQ 'confirm doesnt loop forever' ' CONFIRM=$(git config --get sendemail.confirm) && git config sendemail.confirm auto && GIT_SEND_EMAIL_NOTTY=1 && @@ -684,7 +713,7 @@ test_expect_success 'confirm doesnt loop forever' ' test $ret = "0" ' -test_expect_success 'utf8 Cc is rfc2047 encoded' ' +test_expect_success $PREREQ 'utf8 Cc is rfc2047 encoded' ' clean_fake_sendmail && rm -fr outdir && git format-patch -1 -o outdir --cc="àéìöú <utf8@example.com>" && @@ -697,7 +726,7 @@ test_expect_success 'utf8 Cc is rfc2047 encoded' ' grep "=?UTF-8?q?=C3=A0=C3=A9=C3=AC=C3=B6=C3=BA?= <utf8@example.com>" ' -test_expect_success '--compose adds MIME for utf8 body' ' +test_expect_success $PREREQ '--compose adds MIME for utf8 body' ' clean_fake_sendmail && (echo "#!$SHELL_PATH" && echo "echo utf8 body: àéìöú >>\"\$1\"" @@ -714,7 +743,7 @@ test_expect_success '--compose adds MIME for utf8 body' ' grep "^Content-Type: text/plain; charset=UTF-8" msgtxt1 ' -test_expect_success '--compose respects user mime type' ' +test_expect_success $PREREQ '--compose respects user mime type' ' clean_fake_sendmail && (echo "#!$SHELL_PATH" && echo "(echo MIME-Version: 1.0" @@ -737,7 +766,7 @@ test_expect_success '--compose respects user mime type' ' ! grep "^Content-Type: text/plain; charset=UTF-8" msgtxt1 ' -test_expect_success '--compose adds MIME for utf8 subject' ' +test_expect_success $PREREQ '--compose adds MIME for utf8 subject' ' clean_fake_sendmail && GIT_EDITOR="\"$(pwd)/fake-editor\"" \ git send-email \ @@ -750,7 +779,7 @@ test_expect_success '--compose adds MIME for utf8 subject' ' grep "^Subject: =?UTF-8?q?utf8-s=C3=BCbj=C3=ABct?=" msgtxt1 ' -test_expect_success 'detects ambiguous reference/file conflict' ' +test_expect_success $PREREQ 'detects ambiguous reference/file conflict' ' echo master > master && git add master && git commit -m"add master" && @@ -758,7 +787,7 @@ test_expect_success 'detects ambiguous reference/file conflict' ' grep disambiguate errors ' -test_expect_success 'feed two files' ' +test_expect_success $PREREQ 'feed two files' ' rm -fr outdir && git format-patch -2 -o outdir && git send-email \ @@ -771,7 +800,7 @@ test_expect_success 'feed two files' ' test "z$(sed -n -e 2p subjects)" = "zSubject: [PATCH 2/2] add master" ' -test_expect_success 'in-reply-to but no threading' ' +test_expect_success $PREREQ 'in-reply-to but no threading' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -782,7 +811,7 @@ test_expect_success 'in-reply-to but no threading' ' grep "In-Reply-To: <in-reply-id@example.com>" ' -test_expect_success 'no in-reply-to and no threading' ' +test_expect_success $PREREQ 'no in-reply-to and no threading' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -792,7 +821,7 @@ test_expect_success 'no in-reply-to and no threading' ' ! grep "In-Reply-To: " stdout ' -test_expect_success 'threading but no chain-reply-to' ' +test_expect_success $PREREQ 'threading but no chain-reply-to' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -803,7 +832,7 @@ test_expect_success 'threading but no chain-reply-to' ' grep "In-Reply-To: " stdout ' -test_expect_success 'warning with an implicit --chain-reply-to' ' +test_expect_success $PREREQ 'warning with an implicit --chain-reply-to' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -812,7 +841,7 @@ test_expect_success 'warning with an implicit --chain-reply-to' ' grep "no-chain-reply-to" errors ' -test_expect_success 'no warning with an explicit --chain-reply-to' ' +test_expect_success $PREREQ 'no warning with an explicit --chain-reply-to' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -822,7 +851,7 @@ test_expect_success 'no warning with an explicit --chain-reply-to' ' ! grep "no-chain-reply-to" errors ' -test_expect_success 'no warning with an explicit --no-chain-reply-to' ' +test_expect_success $PREREQ 'no warning with an explicit --no-chain-reply-to' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -832,7 +861,7 @@ test_expect_success 'no warning with an explicit --no-chain-reply-to' ' ! grep "no-chain-reply-to" errors ' -test_expect_success 'no warning with sendemail.chainreplyto = false' ' +test_expect_success $PREREQ 'no warning with sendemail.chainreplyto = false' ' git config sendemail.chainreplyto false && git send-email \ --dry-run \ @@ -842,7 +871,7 @@ test_expect_success 'no warning with sendemail.chainreplyto = false' ' ! grep "no-chain-reply-to" errors ' -test_expect_success 'no warning with sendemail.chainreplyto = true' ' +test_expect_success $PREREQ 'no warning with sendemail.chainreplyto = true' ' git config sendemail.chainreplyto true && git send-email \ --dry-run \ @@ -852,7 +881,7 @@ test_expect_success 'no warning with sendemail.chainreplyto = true' ' ! grep "no-chain-reply-to" errors ' -test_expect_success 'sendemail.to works' ' +test_expect_success $PREREQ 'sendemail.to works' ' git config --replace-all sendemail.to "Somebody <somebody@ex.com>" && git send-email \ --dry-run \ @@ -861,7 +890,7 @@ test_expect_success 'sendemail.to works' ' grep "To: Somebody <somebody@ex.com>" stdout ' -test_expect_success '--no-to overrides sendemail.to' ' +test_expect_success $PREREQ '--no-to overrides sendemail.to' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -872,7 +901,7 @@ test_expect_success '--no-to overrides sendemail.to' ' ! grep "To: Somebody <somebody@ex.com>" stdout ' -test_expect_success 'sendemail.cc works' ' +test_expect_success $PREREQ 'sendemail.cc works' ' git config --replace-all sendemail.cc "Somebody <somebody@ex.com>" && git send-email \ --dry-run \ @@ -882,7 +911,7 @@ test_expect_success 'sendemail.cc works' ' grep "Cc: Somebody <somebody@ex.com>" stdout ' -test_expect_success '--no-cc overrides sendemail.cc' ' +test_expect_success $PREREQ '--no-cc overrides sendemail.cc' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -894,7 +923,7 @@ test_expect_success '--no-cc overrides sendemail.cc' ' ! grep "Cc: Somebody <somebody@ex.com>" stdout ' -test_expect_success 'sendemail.bcc works' ' +test_expect_success $PREREQ 'sendemail.bcc works' ' git config --replace-all sendemail.bcc "Other <other@ex.com>" && git send-email \ --dry-run \ @@ -905,7 +934,7 @@ test_expect_success 'sendemail.bcc works' ' grep "RCPT TO:<other@ex.com>" stdout ' -test_expect_success '--no-bcc overrides sendemail.bcc' ' +test_expect_success $PREREQ '--no-bcc overrides sendemail.bcc' ' git send-email \ --dry-run \ --from="Example <nobody@example.com>" \ @@ -918,6 +947,7 @@ test_expect_success '--no-bcc overrides sendemail.bcc' ' ! grep "RCPT TO:<other@ex.com>" stdout ' +test_expect_success $PREREQ 'setup expect' ' cat >email-using-8bit <<EOF From fe6ecc66ece37198fe5db91fa2fc41d9f4fe5cc4 Mon Sep 17 00:00:00 2001 Message-Id: <bogus-message-id@example.com> @@ -927,14 +957,17 @@ Subject: subject goes here Dieser deutsche Text enthält einen Umlaut! EOF +' +test_expect_success $PREREQ 'setup expect' ' cat >content-type-decl <<EOF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EOF +' -test_expect_success 'asks about and fixes 8bit encodings' ' +test_expect_success $PREREQ 'asks about and fixes 8bit encodings' ' clean_fake_sendmail && echo | git send-email --from=author@example.com --to=nobody@example.com \ @@ -947,7 +980,7 @@ test_expect_success 'asks about and fixes 8bit encodings' ' test_cmp actual content-type-decl ' -test_expect_success 'sendemail.8bitEncoding works' ' +test_expect_success $PREREQ 'sendemail.8bitEncoding works' ' clean_fake_sendmail && git config sendemail.assume8bitEncoding UTF-8 && echo bogus | @@ -958,7 +991,7 @@ test_expect_success 'sendemail.8bitEncoding works' ' test_cmp actual content-type-decl ' -test_expect_success '--8bit-encoding overrides sendemail.8bitEncoding' ' +test_expect_success $PREREQ '--8bit-encoding overrides sendemail.8bitEncoding' ' clean_fake_sendmail && git config sendemail.assume8bitEncoding "bogus too" && echo bogus | @@ -970,6 +1003,7 @@ test_expect_success '--8bit-encoding overrides sendemail.8bitEncoding' ' test_cmp actual content-type-decl ' +test_expect_success $PREREQ 'setup expect' ' cat >email-using-8bit <<EOF From fe6ecc66ece37198fe5db91fa2fc41d9f4fe5cc4 Mon Sep 17 00:00:00 2001 Message-Id: <bogus-message-id@example.com> @@ -979,12 +1013,15 @@ Subject: Dieser Betreff enthält auch einen Umlaut! Nothing to see here. EOF +' +test_expect_success $PREREQ 'setup expect' ' cat >expected <<EOF Subject: =?UTF-8?q?Dieser=20Betreff=20enth=C3=A4lt=20auch=20einen=20Umlaut!?= EOF +' -test_expect_success '--8bit-encoding also treats subject' ' +test_expect_success $PREREQ '--8bit-encoding also treats subject' ' clean_fake_sendmail && echo bogus | git send-email --from=author@example.com --to=nobody@example.com \ diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh new file mode 100755 index 0000000000..a713dfc50b --- /dev/null +++ b/t/t9010-svn-fe.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='check svn dumpfile importer' + +. ./lib-git-svn.sh + +test_dump() { + label=$1 + dump=$2 + test_expect_success "$dump" ' + svnadmin create "$label-svn" && + svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" && + svn_cmd export "file://$PWD/$label-svn" "$label-svnco" && + git init "$label-git" && + test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" && + ( + cd "$label-git" && + git fast-import < ../"$label.fe" + ) && + ( + cd "$label-svnco" && + git init && + git add . && + git fetch "../$label-git" master && + git diff --exit-code FETCH_HEAD + ) + ' +} + +test_dump simple t9135/svn.dump + +test_done diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh index 13766ab160..d5adae640b 100755 --- a/t/t9100-git-svn-basic.sh +++ b/t/t9100-git-svn-basic.sh @@ -271,6 +271,17 @@ test_expect_success 'able to dcommit to a subdirectory' " test -z \"\`git diff refs/heads/my-bar refs/remotes/bar\`\" " +test_expect_success 'dcommit should not fail with a touched file' ' + test_commit "commit-new-file-foo2" foo2 && + test-chmtime =-60 foo && + git svn dcommit +' + +test_expect_success 'rebase should not fail with a touched file' ' + test-chmtime =-60 foo && + git svn rebase +' + test_expect_success 'able to set-tree to a subdirectory' " echo cba > d && git update-index d && diff --git a/t/t9130-git-svn-authors-file.sh b/t/t9130-git-svn-authors-file.sh index 134411e0a5..ec0a106614 100755 --- a/t/t9130-git-svn-authors-file.sh +++ b/t/t9130-git-svn-authors-file.sh @@ -20,7 +20,7 @@ test_expect_success 'setup svnrepo' ' ' test_expect_success 'start import with incomplete authors file' ' - ! git svn clone --authors-file=svn-authors "$svnrepo" x + test_must_fail git svn clone --authors-file=svn-authors "$svnrepo" x ' test_expect_success 'imported 2 revisions successfully' ' @@ -63,7 +63,7 @@ test_expect_success 'authors-file against globs' ' ' test_expect_success 'fetch fails on ee' ' - ( cd aa-work && ! git svn fetch --authors-file=../svn-authors ) + ( cd aa-work && test_must_fail git svn fetch --authors-file=../svn-authors ) ' tmp_config_get () { @@ -95,8 +95,6 @@ test_expect_success 'fresh clone with svn.authors-file in config' ' ( rm -r "$GIT_DIR" && test x = x"$(git config svn.authorsfile)" && - HOME="`pwd`" && - export HOME && test_config="$HOME"/.gitconfig && unset GIT_CONFIG_NOGLOBAL && unset GIT_DIR && diff --git a/t/t9139-git-svn-non-utf8-commitencoding.sh b/t/t9139-git-svn-non-utf8-commitencoding.sh index f337959ccc..22d80b0be2 100755 --- a/t/t9139-git-svn-non-utf8-commitencoding.sh +++ b/t/t9139-git-svn-non-utf8-commitencoding.sh @@ -39,7 +39,7 @@ do ( cd $H && git config --unset i18n.commitencoding && - ! git svn dcommit + test_must_fail git svn dcommit ) ' done diff --git a/t/t9140-git-svn-reset.sh b/t/t9140-git-svn-reset.sh index 0735526d4b..e855904629 100755 --- a/t/t9140-git-svn-reset.sh +++ b/t/t9140-git-svn-reset.sh @@ -41,7 +41,7 @@ test_expect_success 'modify hidden file in SVN repo' ' test_expect_success 'fetch fails on modified hidden file' ' ( cd g && git svn find-rev refs/remotes/git-svn > ../expect && - ! git svn fetch 2> ../errors && + test_must_fail git svn fetch 2> ../errors && git svn find-rev refs/remotes/git-svn > ../expect2 ) && fgrep "not found in commit" errors && test_cmp expect expect2 diff --git a/t/t9155-git-svn-fetch-deleted-tag.sh b/t/t9155-git-svn-fetch-deleted-tag.sh new file mode 100755 index 0000000000..a486a98f84 --- /dev/null +++ b/t/t9155-git-svn-fetch-deleted-tag.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +test_description='git svn fetch deleted tag' + +. ./lib-git-svn.sh + +test_expect_success 'setup svn repo' ' + mkdir -p import/trunk/subdir && + mkdir -p import/branches && + mkdir -p import/tags && + echo "base" >import/trunk/subdir/file && + svn_cmd import -m "import for git svn" import "$svnrepo" && + rm -rf import && + + svn_cmd mkdir -m "create mybranch directory" "$svnrepo/branches/mybranch" && + svn_cmd cp -m "create branch mybranch" "$svnrepo/trunk" "$svnrepo/branches/mybranch/trunk" && + + svn_cmd co "$svnrepo/trunk" svn_project && + (cd svn_project && + echo "trunk change" >>subdir/file && + svn_cmd ci -m "trunk change" subdir/file && + + svn_cmd switch "$svnrepo/branches/mybranch/trunk" && + echo "branch change" >>subdir/file && + svn_cmd ci -m "branch change" subdir/file + ) && + + svn_cmd cp -m "create mytag attempt 1" -r5 "$svnrepo/trunk/subdir" "$svnrepo/tags/mytag" && + svn_cmd rm -m "delete mytag attempt 1" "$svnrepo/tags/mytag" && + svn_cmd cp -m "create mytag attempt 2" -r5 "$svnrepo/branches/mybranch/trunk/subdir" "$svnrepo/tags/mytag" +' + +test_expect_success 'fetch deleted tags from same revision with checksum error' ' + git svn init --stdlayout "$svnrepo" git_project && + cd git_project && + git svn fetch && + + git diff --exit-code mybranch:trunk/subdir/file tags/mytag:file && + git diff --exit-code master:subdir/file tags/mytag^:file +' + +test_done diff --git a/t/t9156-git-svn-fetch-deleted-tag-2.sh b/t/t9156-git-svn-fetch-deleted-tag-2.sh new file mode 100755 index 0000000000..5ce7e2f3b0 --- /dev/null +++ b/t/t9156-git-svn-fetch-deleted-tag-2.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +test_description='git svn fetch deleted tag 2' + +. ./lib-git-svn.sh + +test_expect_success 'setup svn repo' ' + mkdir -p import/branches && + mkdir -p import/tags && + mkdir -p import/trunk/subdir1 && + mkdir -p import/trunk/subdir2 && + mkdir -p import/trunk/subdir3 && + echo "file1" >import/trunk/subdir1/file && + echo "file2" >import/trunk/subdir2/file && + echo "file3" >import/trunk/subdir3/file && + svn_cmd import -m "import for git svn" import "$svnrepo" && + rm -rf import && + + svn_cmd co "$svnrepo/trunk" svn_project && + (cd svn_project && + echo "change1" >>subdir1/file && + echo "change2" >>subdir2/file && + echo "change3" >>subdir3/file && + svn_cmd ci -m "change" . + ) && + + svn_cmd cp -m "create mytag 1" -r2 "$svnrepo/trunk/subdir1" "$svnrepo/tags/mytag" && + svn_cmd rm -m "delete mytag 1" "$svnrepo/tags/mytag" && + svn_cmd cp -m "create mytag 2" -r2 "$svnrepo/trunk/subdir2" "$svnrepo/tags/mytag" && + svn_cmd rm -m "delete mytag 2" "$svnrepo/tags/mytag" && + svn_cmd cp -m "create mytag 3" -r2 "$svnrepo/trunk/subdir3" "$svnrepo/tags/mytag" +' + +test_expect_success 'fetch deleted tags from same revision with no checksum error' ' + git svn init --stdlayout "$svnrepo" git_project && + cd git_project && + git svn fetch && + + git diff --exit-code master:subdir3/file tags/mytag:file && + git diff --exit-code master:subdir2/file tags/mytag^:file && + git diff --exit-code master:subdir1/file tags/mytag^^:file +' + +test_done diff --git a/t/t9200-git-cvsexportcommit.sh b/t/t9200-git-cvsexportcommit.sh index ee39b36d78..e5da65b99f 100755 --- a/t/t9200-git-cvsexportcommit.sh +++ b/t/t9200-git-cvsexportcommit.sh @@ -5,6 +5,7 @@ test_description='Test export of commits to CVS' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-prereq-FILEMODE.sh if ! test_have_prereq PERL; then skip_all='skipping git cvsexportcommit tests, perl not available' @@ -229,11 +230,6 @@ test_expect_success \ test_must_fail git cvsexportcommit -c $id )' -if ! test "$(git config --bool core.filemode)" = false -then - test_set_prereq FILEMODE -fi - test_expect_success FILEMODE \ 'Retain execute bit' \ 'mkdir G && diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 2aeed7bd06..96d07f1833 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -853,6 +853,60 @@ test_expect_success \ 'git fast-import <input && test `git rev-parse N2^{tree}` = `git rev-parse N3^{tree}`' +test_expect_success \ + 'N: copy directory by id' \ + 'cat >expect <<-\EOF && + :100755 100755 f1fb5da718392694d0076d677d6d0e364c79b0bc f1fb5da718392694d0076d677d6d0e364c79b0bc C100 file2/newf file3/newf + :100644 100644 7123f7f44e39be127c5eb701e5968176ee9d78b1 7123f7f44e39be127c5eb701e5968176ee9d78b1 C100 file2/oldf file3/oldf + EOF + subdir=$(git rev-parse refs/heads/branch^0:file2) && + cat >input <<-INPUT_END && + commit refs/heads/N4 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + copy by tree hash + COMMIT + + from refs/heads/branch^0 + M 040000 $subdir file3 + INPUT_END + git fast-import <input && + git diff-tree -C --find-copies-harder -r N4^ N4 >actual && + compare_diff_raw expect actual' + +test_expect_success \ + 'N: modify copied tree' \ + 'cat >expect <<-\EOF && + :100644 100644 fcf778cda181eaa1cbc9e9ce3a2e15ee9f9fe791 fcf778cda181eaa1cbc9e9ce3a2e15ee9f9fe791 C100 newdir/interesting file3/file5 + :100755 100755 f1fb5da718392694d0076d677d6d0e364c79b0bc f1fb5da718392694d0076d677d6d0e364c79b0bc C100 file2/newf file3/newf + :100644 100644 7123f7f44e39be127c5eb701e5968176ee9d78b1 7123f7f44e39be127c5eb701e5968176ee9d78b1 C100 file2/oldf file3/oldf + EOF + subdir=$(git rev-parse refs/heads/branch^0:file2) && + cat >input <<-INPUT_END && + commit refs/heads/N5 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + copy by tree hash + COMMIT + + from refs/heads/branch^0 + M 040000 $subdir file3 + + commit refs/heads/N5 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + modify directory copy + COMMIT + + M 644 inline file3/file5 + data <<EOF + $file5_data + EOF + INPUT_END + git fast-import <input && + git diff-tree -C --find-copies-harder -r N5^^ N5 >actual && + compare_diff_raw expect actual' + ### ### series O ### diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index d43f37ccaf..8c8e679468 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -355,6 +355,20 @@ test_expect_failure 'no exact-ref revisions included' ' ) ' +test_expect_success 'path limiting with import-marks does not lose unmodified files' ' + git checkout -b simple marks~2 && + git fast-export --export-marks=marks simple -- file > /dev/null && + echo more content >> file && + test_tick && + git commit -mnext file && + git fast-export --import-marks=marks simple -- file file0 | grep file0 +' + +test_expect_success 'full-tree re-shows unmodified files' ' + git checkout -f simple && + test $(git fast-export --full-tree simple | grep -c file0) -eq 3 +' + test_expect_success 'set-up a few more tags for tag export tests' ' git checkout -f master && HEAD_TREE=`git show -s --pretty=raw HEAD | grep tree | sed "s/tree //"` && @@ -376,4 +390,28 @@ test_expect_success 'tree_tag-obj' 'git fast-export tree_tag-obj' test_expect_success 'tag-obj_tag' 'git fast-export tag-obj_tag' test_expect_success 'tag-obj_tag-obj' 'git fast-export tag-obj_tag-obj' +test_expect_success SYMLINKS 'directory becomes symlink' ' + git init dirtosymlink && + git init result && + ( + cd dirtosymlink && + mkdir foo && + mkdir bar && + echo hello > foo/world && + echo hello > bar/world && + git add foo/world bar/world && + git commit -q -mone && + git rm -r foo && + ln -s bar foo && + git add foo && + git commit -q -mtwo + ) && + ( + cd dirtosymlink && + git fast-export master -- foo | + (cd ../result && git fast-import --quiet) + ) && + (cd result && git show master:foo) +' + test_done diff --git a/t/t9600-cvsimport.sh b/t/t9600-cvsimport.sh index 2eff9cd68c..559ce41795 100755 --- a/t/t9600-cvsimport.sh +++ b/t/t9600-cvsimport.sh @@ -3,17 +3,14 @@ test_description='git cvsimport basic tests' . ./lib-cvs.sh -if ! test_have_prereq PERL; then - skip_all='skipping git cvsimport tests, perl not available' - test_done -fi - -CVSROOT=$(pwd)/cvsroot -export CVSROOT +test_expect_success PERL 'setup cvsroot environment' ' + CVSROOT=$(pwd)/cvsroot && + export CVSROOT +' -test_expect_success 'setup cvsroot' '$CVS init' +test_expect_success PERL 'setup cvsroot' '$CVS init' -test_expect_success 'setup a cvs module' ' +test_expect_success PERL 'setup a cvs module' ' mkdir "$CVSROOT/module" && $CVS co -d module-cvs module && @@ -45,23 +42,23 @@ EOF cd .. ' -test_expect_success 'import a trivial module' ' +test_expect_success PERL 'import a trivial module' ' git cvsimport -a -R -z 0 -C module-git module && test_cmp module-cvs/o_fortuna module-git/o_fortuna ' -test_expect_success 'pack refs' 'cd module-git && git gc && cd ..' +test_expect_success PERL 'pack refs' 'cd module-git && git gc && cd ..' -test_expect_success 'initial import has correct .git/cvs-revisions' ' +test_expect_success PERL 'initial import has correct .git/cvs-revisions' ' (cd module-git && git log --format="o_fortuna 1.1 %H" -1) > expected && test_cmp expected module-git/.git/cvs-revisions ' -test_expect_success 'update cvs module' ' +test_expect_success PERL 'update cvs module' ' cd module-cvs && cat <<EOF >o_fortuna && @@ -90,7 +87,7 @@ EOF cd .. ' -test_expect_success 'update git module' ' +test_expect_success PERL 'update git module' ' cd module-git && git cvsimport -a -R -z 0 module && @@ -100,7 +97,7 @@ test_expect_success 'update git module' ' ' -test_expect_success 'update has correct .git/cvs-revisions' ' +test_expect_success PERL 'update has correct .git/cvs-revisions' ' (cd module-git && git log --format="o_fortuna 1.1 %H" -1 HEAD^ && @@ -108,7 +105,7 @@ test_expect_success 'update has correct .git/cvs-revisions' ' test_cmp expected module-git/.git/cvs-revisions ' -test_expect_success 'update cvs module' ' +test_expect_success PERL 'update cvs module' ' cd module-cvs && echo 1 >tick && @@ -118,7 +115,7 @@ test_expect_success 'update cvs module' ' ' -test_expect_success 'cvsimport.module config works' ' +test_expect_success PERL 'cvsimport.module config works' ' cd module-git && git config cvsimport.module module && @@ -129,7 +126,7 @@ test_expect_success 'cvsimport.module config works' ' ' -test_expect_success 'second update has correct .git/cvs-revisions' ' +test_expect_success PERL 'second update has correct .git/cvs-revisions' ' (cd module-git && git log --format="o_fortuna 1.1 %H" -1 HEAD^^ && @@ -138,7 +135,7 @@ test_expect_success 'second update has correct .git/cvs-revisions' ' test_cmp expected module-git/.git/cvs-revisions ' -test_expect_success 'import from a CVS working tree' ' +test_expect_success PERL 'import from a CVS working tree' ' $CVS co -d import-from-wt module && cd import-from-wt && @@ -150,12 +147,12 @@ test_expect_success 'import from a CVS working tree' ' ' -test_expect_success 'no .git/cvs-revisions created by default' ' +test_expect_success PERL 'no .git/cvs-revisions created by default' ' ! test -e import-from-wt/.git/cvs-revisions ' -test_expect_success 'test entire HEAD' 'test_cmp_branch_tree master' +test_expect_success PERL 'test entire HEAD' 'test_cmp_branch_tree master' test_done diff --git a/t/t9601-cvsimport-vendor-branch.sh b/t/t9601-cvsimport-vendor-branch.sh index 3afaf56526..827d39f5bf 100755 --- a/t/t9601-cvsimport-vendor-branch.sh +++ b/t/t9601-cvsimport-vendor-branch.sh @@ -34,50 +34,49 @@ test_description='git cvsimport handling of vendor branches' . ./lib-cvs.sh -CVSROOT="$TEST_DIRECTORY"/t9601/cvsroot -export CVSROOT +setup_cvs_test_repository t9601 -test_expect_success 'import a module with a vendor branch' ' +test_expect_success PERL 'import a module with a vendor branch' ' git cvsimport -C module-git module ' -test_expect_success 'check HEAD out of cvs repository' 'test_cvs_co master' +test_expect_success PERL 'check HEAD out of cvs repository' 'test_cvs_co master' -test_expect_success 'check master out of git repository' 'test_git_co master' +test_expect_success PERL 'check master out of git repository' 'test_git_co master' -test_expect_success 'check a file that was imported once' ' +test_expect_success PERL 'check a file that was imported once' ' test_cmp_branch_file master imported-once.txt ' -test_expect_failure 'check a file that was imported twice' ' +test_expect_failure PERL 'check a file that was imported twice' ' test_cmp_branch_file master imported-twice.txt ' -test_expect_success 'check a file that was imported then modified on HEAD' ' +test_expect_success PERL 'check a file that was imported then modified on HEAD' ' test_cmp_branch_file master imported-modified.txt ' -test_expect_success 'check a file that was imported, modified, then imported again' ' +test_expect_success PERL 'check a file that was imported, modified, then imported again' ' test_cmp_branch_file master imported-modified-imported.txt ' -test_expect_success 'check a file that was added to HEAD then imported' ' +test_expect_success PERL 'check a file that was added to HEAD then imported' ' test_cmp_branch_file master added-imported.txt ' -test_expect_success 'a vendor branch whose tag has been removed' ' +test_expect_success PERL 'a vendor branch whose tag has been removed' ' test_cmp_branch_file master imported-anonymously.txt diff --git a/t/t9602-cvsimport-branches-tags.sh b/t/t9602-cvsimport-branches-tags.sh index 67878b2d0c..e1db323f54 100755 --- a/t/t9602-cvsimport-branches-tags.sh +++ b/t/t9602-cvsimport-branches-tags.sh @@ -6,70 +6,69 @@ test_description='git cvsimport handling of branches and tags' . ./lib-cvs.sh -CVSROOT="$TEST_DIRECTORY"/t9602/cvsroot -export CVSROOT +setup_cvs_test_repository t9602 -test_expect_success 'import module' ' +test_expect_success PERL 'import module' ' git cvsimport -C module-git module ' -test_expect_success 'test branch master' ' +test_expect_success PERL 'test branch master' ' test_cmp_branch_tree master ' -test_expect_success 'test branch vendorbranch' ' +test_expect_success PERL 'test branch vendorbranch' ' test_cmp_branch_tree vendorbranch ' -test_expect_failure 'test branch B_FROM_INITIALS' ' +test_expect_failure PERL 'test branch B_FROM_INITIALS' ' test_cmp_branch_tree B_FROM_INITIALS ' -test_expect_failure 'test branch B_FROM_INITIALS_BUT_ONE' ' +test_expect_failure PERL 'test branch B_FROM_INITIALS_BUT_ONE' ' test_cmp_branch_tree B_FROM_INITIALS_BUT_ONE ' -test_expect_failure 'test branch B_MIXED' ' +test_expect_failure PERL 'test branch B_MIXED' ' test_cmp_branch_tree B_MIXED ' -test_expect_success 'test branch B_SPLIT' ' +test_expect_success PERL 'test branch B_SPLIT' ' test_cmp_branch_tree B_SPLIT ' -test_expect_failure 'test tag vendortag' ' +test_expect_failure PERL 'test tag vendortag' ' test_cmp_branch_tree vendortag ' -test_expect_success 'test tag T_ALL_INITIAL_FILES' ' +test_expect_success PERL 'test tag T_ALL_INITIAL_FILES' ' test_cmp_branch_tree T_ALL_INITIAL_FILES ' -test_expect_failure 'test tag T_ALL_INITIAL_FILES_BUT_ONE' ' +test_expect_failure PERL 'test tag T_ALL_INITIAL_FILES_BUT_ONE' ' test_cmp_branch_tree T_ALL_INITIAL_FILES_BUT_ONE ' -test_expect_failure 'test tag T_MIXED' ' +test_expect_failure PERL 'test tag T_MIXED' ' test_cmp_branch_tree T_MIXED diff --git a/t/t9603-cvsimport-patchsets.sh b/t/t9603-cvsimport-patchsets.sh index 958bdce4dd..93c4fa885e 100755 --- a/t/t9603-cvsimport-patchsets.sh +++ b/t/t9603-cvsimport-patchsets.sh @@ -14,8 +14,7 @@ test_description='git cvsimport testing for correct patchset estimation' . ./lib-cvs.sh -CVSROOT="$TEST_DIRECTORY"/t9603/cvsroot -export CVSROOT +setup_cvs_test_repository t9603 test_expect_failure 'import with criss cross times on revisions' ' diff --git a/t/test-lib.sh b/t/test-lib.sh index 29fd7209cf..dff5e25ae6 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -331,12 +331,35 @@ test_set_prereq () { satisfied=" " test_have_prereq () { - case $satisfied in - *" $1 "*) - : yes, have it ;; - *) - ! : nope ;; - esac + # prerequisites can be concatenated with ',' + save_IFS=$IFS + IFS=, + set -- $* + IFS=$save_IFS + + total_prereq=0 + ok_prereq=0 + missing_prereq= + + for prerequisite + do + total_prereq=$(($total_prereq + 1)) + case $satisfied in + *" $prerequisite "*) + ok_prereq=$(($ok_prereq + 1)) + ;; + *) + # Keep a list of missing prerequisites + if test -z "$missing_prereq" + then + missing_prereq=$prerequisite + else + missing_prereq="$prerequisite,$missing_prereq" + fi + esac + done + + test $total_prereq = $ok_prereq } # You are not expected to call test_ok_ and test_failure_ directly, use @@ -398,8 +421,14 @@ test_skip () { fi case "$to_skip" in t) + of_prereq= + if test "$missing_prereq" != "$prereq" + then + of_prereq=" of $prereq" + fi + say_color skip >&3 "skipping test: $@" - say_color skip "ok $test_count # skip $1" + say_color skip "ok $test_count # skip $1 (missing $missing_prereq${of_prereq})" : true ;; *) @@ -545,6 +574,38 @@ test_external_without_stderr () { fi } +# debugging-friendly alternatives to "test [-f|-d|-e]" +# The commands test the existence or non-existence of $1. $2 can be +# given to provide a more precise diagnosis. +test_path_is_file () { + if ! [ -f "$1" ] + then + echo "File $1 doesn't exist. $*" + false + fi +} + +test_path_is_dir () { + if ! [ -d "$1" ] + then + echo "Directory $1 doesn't exist. $*" + false + fi +} + +test_path_is_missing () { + if [ -e "$1" ] + then + echo "Path exists:" + ls -ld "$1" + if [ $# -ge 1 ]; then + echo "$*" + fi + false + fi +} + + # This is not among top-level (test_expect_success | test_expect_failure) # but is a prefix that can be used in the test script, like: # @@ -625,28 +686,31 @@ test_when_finished () { test_create_repo () { test "$#" = 1 || error "bug in the test script: not 1 parameter to test-create-repo" - owd=`pwd` repo="$1" mkdir -p "$repo" - cd "$repo" || error "Cannot setup test environment" - "$GIT_EXEC_PATH/git-init" "--template=$TEST_DIRECTORY/../templates/blt/" >&3 2>&4 || - error "cannot run git init -- have you built things yet?" - mv .git/hooks .git/hooks-disabled - cd "$owd" + ( + cd "$repo" || error "Cannot setup test environment" + "$GIT_EXEC_PATH/git-init" "--template=$GIT_BUILD_DIR/templates/blt/" >&3 2>&4 || + error "cannot run git init -- have you built things yet?" + mv .git/hooks .git/hooks-disabled + ) || exit } test_done () { GIT_EXIT_OK=t - test_results_dir="$TEST_DIRECTORY/test-results" - mkdir -p "$test_results_dir" - test_results_path="$test_results_dir/${0%.sh}-$$.counts" - echo "total $test_count" >> $test_results_path - echo "success $test_success" >> $test_results_path - echo "fixed $test_fixed" >> $test_results_path - echo "broken $test_broken" >> $test_results_path - echo "failed $test_failure" >> $test_results_path - echo "" >> $test_results_path + if test -z "$HARNESS_ACTIVE"; then + test_results_dir="$TEST_DIRECTORY/test-results" + mkdir -p "$test_results_dir" + test_results_path="$test_results_dir/${0%.sh}-$$.counts" + + echo "total $test_count" >> $test_results_path + echo "success $test_success" >> $test_results_path + echo "fixed $test_fixed" >> $test_results_path + echo "broken $test_broken" >> $test_results_path + echo "failed $test_failure" >> $test_results_path + echo "" >> $test_results_path + fi if test "$test_fixed" != 0 then @@ -688,7 +752,15 @@ test_done () { # Test the binaries we have just built. The tests are kept in # t/ subdirectory and are run in 'trash directory' subdirectory. -TEST_DIRECTORY=$(pwd) +if test -z "$TEST_DIRECTORY" +then + # We allow tests to override this, in case they want to run tests + # outside of t/, e.g. for running tests on the test library + # itself. + TEST_DIRECTORY=$(pwd) +fi +GIT_BUILD_DIR="$TEST_DIRECTORY"/.. + if test -n "$valgrind" then make_symlink () { @@ -715,7 +787,7 @@ then test -x "$1" || return base=$(basename "$1") - symlink_target=$TEST_DIRECTORY/../$base + symlink_target=$GIT_BUILD_DIR/$base # do not override scripts if test -x "$symlink_target" && test ! -d "$symlink_target" && @@ -734,7 +806,7 @@ then # override all git executables in TEST_DIRECTORY/.. GIT_VALGRIND=$TEST_DIRECTORY/valgrind mkdir -p "$GIT_VALGRIND"/bin - for file in $TEST_DIRECTORY/../git* $TEST_DIRECTORY/../test-* + for file in $GIT_BUILD_DIR/git* $GIT_BUILD_DIR/test-* do make_valgrind_symlink $file done @@ -755,10 +827,10 @@ then elif test -n "$GIT_TEST_INSTALLED" ; then GIT_EXEC_PATH=$($GIT_TEST_INSTALLED/git --exec-path) || error "Cannot run git from $GIT_TEST_INSTALLED." - PATH=$GIT_TEST_INSTALLED:$TEST_DIRECTORY/..:$PATH + PATH=$GIT_TEST_INSTALLED:$GIT_BUILD_DIR:$PATH GIT_EXEC_PATH=${GIT_TEST_EXEC_PATH:-$GIT_EXEC_PATH} else # normal case, use ../bin-wrappers only unless $with_dashes: - git_bin_dir="$TEST_DIRECTORY/../bin-wrappers" + git_bin_dir="$GIT_BUILD_DIR/bin-wrappers" if ! test -x "$git_bin_dir/git" ; then if test -z "$with_dashes" ; then say "$git_bin_dir/git is not executable; using GIT_EXEC_PATH" @@ -766,18 +838,18 @@ else # normal case, use ../bin-wrappers only unless $with_dashes: with_dashes=t fi PATH="$git_bin_dir:$PATH" - GIT_EXEC_PATH=$TEST_DIRECTORY/.. + GIT_EXEC_PATH=$GIT_BUILD_DIR if test -n "$with_dashes" ; then - PATH="$TEST_DIRECTORY/..:$PATH" + PATH="$GIT_BUILD_DIR:$PATH" fi fi -GIT_TEMPLATE_DIR=$(pwd)/../templates/blt +GIT_TEMPLATE_DIR="$GIT_BUILD_DIR"/templates/blt unset GIT_CONFIG GIT_CONFIG_NOSYSTEM=1 GIT_CONFIG_NOGLOBAL=1 export PATH GIT_EXEC_PATH GIT_TEMPLATE_DIR GIT_CONFIG_NOSYSTEM GIT_CONFIG_NOGLOBAL -. ../GIT-BUILD-OPTIONS +. "$GIT_BUILD_DIR"/GIT-BUILD-OPTIONS if test -z "$GIT_TEST_CMP" then @@ -789,22 +861,22 @@ then fi fi -GITPERLLIB=$(pwd)/../perl/blib/lib:$(pwd)/../perl/blib/arch/auto/Git +GITPERLLIB="$GIT_BUILD_DIR"/perl/blib/lib:"$GIT_BUILD_DIR"/perl/blib/arch/auto/Git export GITPERLLIB -test -d ../templates/blt || { +test -d "$GIT_BUILD_DIR"/templates/blt || { error "You haven't built things yet, have you?" } if test -z "$GIT_TEST_INSTALLED" && test -z "$NO_PYTHON" then - GITPYTHONLIB="$(pwd)/../git_remote_helpers/build/lib" + GITPYTHONLIB="$GIT_BUILD_DIR/git_remote_helpers/build/lib" export GITPYTHONLIB - test -d ../git_remote_helpers/build || { + test -d "$GIT_BUILD_DIR"/git_remote_helpers/build || { error "You haven't built git_remote_helpers yet, have you?" } fi -if ! test -x ../test-chmtime; then +if ! test -x "$GIT_BUILD_DIR"/test-chmtime; then echo >&2 'You need to build test-chmtime:' echo >&2 'Run "make test-chmtime" in the source (toplevel) directory' exit 1 @@ -829,6 +901,9 @@ test_create_repo "$test" # in subprocesses like git equals our $PWD (for pathname comparisons). cd -P "$test" || exit 1 +HOME=$(pwd) +export HOME + this_test=${0##*/} this_test=${this_test%%-*} for skp in $GIT_SKIP_TESTS @@ -890,3 +965,7 @@ test -z "$NO_PYTHON" && test_set_prereq PYTHON # test whether the filesystem supports symbolic links ln -s x y 2>/dev/null && test -h y 2>/dev/null && test_set_prereq SYMLINKS rm -f y + +# When the tests are run as root, permission tests will report that +# things are writable when they shouldn't be. +test -w / || test_set_prereq SANITY diff --git a/test-line-buffer.c b/test-line-buffer.c new file mode 100644 index 0000000000..c11bf7f967 --- /dev/null +++ b/test-line-buffer.c @@ -0,0 +1,46 @@ +/* + * test-line-buffer.c: code to exercise the svn importer's input helper + * + * Input format: + * number NL + * (number bytes) NL + * number NL + * ... + */ + +#include "git-compat-util.h" +#include "vcs-svn/line_buffer.h" + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || *end != '\0') + die("invalid count: %s", s); + return (uint32_t) n; +} + +int main(int argc, char *argv[]) +{ + char *s; + + if (argc != 1) + usage("test-line-buffer < input.txt"); + if (buffer_init(NULL)) + die_errno("open error"); + while ((s = buffer_read_line())) { + s = buffer_read_string(strtouint32(s)); + fputs(s, stdout); + fputc('\n', stdout); + buffer_skip_bytes(1); + if (!(s = buffer_read_line())) + break; + buffer_copy_bytes(strtouint32(s) + 1); + } + if (buffer_deinit()) + die("input error"); + if (ferror(stdout)) + die("output error"); + buffer_reset(); + return 0; +} diff --git a/test-obj-pool.c b/test-obj-pool.c new file mode 100644 index 0000000000..5018863ef5 --- /dev/null +++ b/test-obj-pool.c @@ -0,0 +1,116 @@ +/* + * test-obj-pool.c: code to exercise the svn importer's object pool + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" + +enum pool { POOL_ONE, POOL_TWO }; +obj_pool_gen(one, int, 1) +obj_pool_gen(two, int, 4096) + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid offset: %s", s); + return (uint32_t) n; +} + +static void handle_command(const char *command, enum pool pool, const char *arg) +{ + switch (*command) { + case 'a': + if (!prefixcmp(command, "alloc ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_alloc(n) : two_alloc(n)); + return; + } + case 'c': + if (!prefixcmp(command, "commit ")) { + pool == POOL_ONE ? one_commit() : two_commit(); + return; + } + if (!prefixcmp(command, "committed ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_pool.committed : two_pool.committed); + return; + } + case 'f': + if (!prefixcmp(command, "free ")) { + uint32_t n = strtouint32(arg); + pool == POOL_ONE ? one_free(n) : two_free(n); + return; + } + case 'n': + if (!prefixcmp(command, "null ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(NULL) : two_offset(NULL)); + return; + } + case 'o': + if (!prefixcmp(command, "offset ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(one_pointer(n)) : + two_offset(two_pointer(n))); + return; + } + case 'r': + if (!prefixcmp(command, "reset ")) { + pool == POOL_ONE ? one_reset() : two_reset(); + return; + } + case 's': + if (!prefixcmp(command, "set ")) { + uint32_t n = strtouint32(arg); + if (pool == POOL_ONE) + *one_pointer(n) = 1; + else + *two_pointer(n) = 1; + return; + } + case 't': + if (!prefixcmp(command, "test ")) { + uint32_t n = strtouint32(arg); + printf("%d\n", pool == POOL_ONE ? + *one_pointer(n) : *two_pointer(n)); + return; + } + default: + die("unrecognized command: %s", command); + } +} + +static void handle_line(const char *line) +{ + const char *arg = strchr(line, ' '); + enum pool pool; + + if (arg && !prefixcmp(arg + 1, "one")) + pool = POOL_ONE; + else if (arg && !prefixcmp(arg + 1, "two")) + pool = POOL_TWO; + else + die("no pool specified: %s", line); + + handle_command(line, pool, arg + strlen("one ")); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + if (argc != 1) + usage("test-obj-str < script"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) + handle_line(sb.buf); + strbuf_release(&sb); + return 0; +} diff --git a/test-string-pool.c b/test-string-pool.c new file mode 100644 index 0000000000..c5782e6bce --- /dev/null +++ b/test-string-pool.c @@ -0,0 +1,31 @@ +/* + * test-string-pool.c: code to exercise the svn importer's string pool + */ + +#include "git-compat-util.h" +#include "vcs-svn/string_pool.h" + +int main(int argc, char *argv[]) +{ + const uint32_t unequal = pool_intern("does not equal"); + const uint32_t equal = pool_intern("equals"); + uint32_t buf[3]; + uint32_t n; + + if (argc != 2) + usage("test-string-pool <string>,<string>"); + + n = pool_tok_seq(3, buf, ",-", argv[1]); + if (n >= 3) + die("too many strings"); + if (n <= 1) + die("too few strings"); + + buf[2] = buf[1]; + buf[1] = (buf[0] == buf[2]) ? equal : unequal; + pool_print_seq(3, buf, ' ', stdout); + fputc('\n', stdout); + + pool_reset(); + return 0; +} diff --git a/test-svn-fe.c b/test-svn-fe.c new file mode 100644 index 0000000000..77cf78abcf --- /dev/null +++ b/test-svn-fe.c @@ -0,0 +1,17 @@ +/* + * test-svn-fe: Code to exercise the svn import lib + */ + +#include "git-compat-util.h" +#include "vcs-svn/svndump.h" + +int main(int argc, char *argv[]) +{ + if (argc != 2) + usage("test-svn-fe <file>"); + svndump_init(argv[1]); + svndump_read(NULL); + svndump_deinit(); + svndump_reset(); + return 0; +} diff --git a/test-treap.c b/test-treap.c new file mode 100644 index 0000000000..cdba5111e1 --- /dev/null +++ b/test-treap.c @@ -0,0 +1,65 @@ +/* + * test-treap.c: code to exercise the svn importer's treap structure + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" +#include "vcs-svn/trp.h" + +struct int_node { + uintmax_t n; + struct trp_node children; +}; + +obj_pool_gen(node, struct int_node, 3) + +static int node_cmp(struct int_node *a, struct int_node *b) +{ + return (a->n > b->n) - (a->n < b->n); +} + +trp_gen(static, treap_, struct int_node, children, node, node_cmp) + +static void strtonode(struct int_node *item, const char *s) +{ + char *end; + item->n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid integer: %s", s); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + struct trp_root root = { ~0 }; + uint32_t item; + + if (argc != 1) + usage("test-treap < ints"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) { + item = node_alloc(1); + strtonode(node_pointer(item), sb.buf); + treap_insert(&root, node_pointer(item)); + } + + item = node_offset(treap_first(&root)); + while (~item) { + uint32_t next; + struct int_node *tmp = node_pointer(node_alloc(1)); + + tmp->n = node_pointer(item)->n; + next = node_offset(treap_next(&root, node_pointer(item))); + + treap_remove(&root, node_pointer(item)); + item = node_offset(treap_nsearch(&root, tmp)); + + if (item != next && (!~item || node_pointer(item)->n != tmp->n)) + die("found %"PRIuMAX" in place of %"PRIuMAX"", + ~item ? node_pointer(item)->n : ~(uintmax_t) 0, + ~next ? node_pointer(next)->n : ~(uintmax_t) 0); + printf("%"PRIuMAX"\n", tmp->n); + } + node_reset(); + return 0; +} diff --git a/transport-helper.c b/transport-helper.c index 191fbf798a..acfc88e3f1 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -689,7 +689,7 @@ static int push_refs_with_export(struct transport *transport, struct child_process *helper, exporter; struct helper_data *data = transport->data; char *export_marks = NULL, *import_marks = NULL; - struct string_list revlist_args = { NULL, 0, 0 }; + struct string_list revlist_args = STRING_LIST_INIT_NODUP; struct strbuf buf = STRBUF_INIT; helper = get_helper(transport); diff --git a/tree-walk.c b/tree-walk.c index 67a9a0c5a5..a9bbf4e235 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -1,5 +1,6 @@ #include "cache.h" #include "tree-walk.h" +#include "unpack-trees.h" #include "tree.h" static const char *get_mode(const char *str, unsigned int *modep) @@ -310,6 +311,7 @@ static void free_extended_entry(struct tree_desc_x *t) int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) { int ret = 0; + int error = 0; struct name_entry *entry = xmalloc(n*sizeof(*entry)); int i; struct tree_desc_x *tx = xcalloc(n, sizeof(*tx)); @@ -377,8 +379,11 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) if (!mask) break; ret = info->fn(n, mask, dirmask, entry, info); - if (ret < 0) - break; + if (ret < 0) { + error = ret; + if (!info->show_all_errors) + break; + } mask &= ret; ret = 0; for (i = 0; i < n; i++) @@ -389,7 +394,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info) for (i = 0; i < n; i++) free_extended_entry(tx + i); free(tx); - return ret; + return error; } static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode) diff --git a/tree-walk.h b/tree-walk.h index f78361a676..7e3e0b5ad1 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -48,6 +48,7 @@ struct traverse_info { unsigned long conflicts; traverse_callback_t fn; void *data; + int show_all_errors; }; int get_tree_entry(const unsigned char *, const char *, unsigned char *, unsigned *); diff --git a/unpack-trees.c b/unpack-trees.c index f561d88156..803445aa7b 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -13,37 +13,90 @@ * Error messages expected by scripts out of plumbing commands such as * read-tree. Non-scripted Porcelain is not required to use these messages * and in fact are encouraged to reword them to better suit their particular - * situation better. See how "git checkout" replaces not_uptodate_file to - * explain why it does not allow switching between branches when you have - * local changes, for example. + * situation better. See how "git checkout" and "git merge" replaces + * them using setup_unpack_trees_porcelain(), for example. */ -static struct unpack_trees_error_msgs unpack_plumbing_errors = { - /* would_overwrite */ +const char *unpack_plumbing_errors[NB_UNPACK_TREES_ERROR_TYPES] = { + /* ERROR_WOULD_OVERWRITE */ "Entry '%s' would be overwritten by merge. Cannot merge.", - /* not_uptodate_file */ + /* ERROR_NOT_UPTODATE_FILE */ "Entry '%s' not uptodate. Cannot merge.", - /* not_uptodate_dir */ + /* ERROR_NOT_UPTODATE_DIR */ "Updating '%s' would lose untracked files in it", - /* would_lose_untracked */ - "Untracked working tree file '%s' would be %s by merge.", + /* ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN */ + "Untracked working tree file '%s' would be overwritten by merge.", - /* bind_overlap */ + /* ERROR_WOULD_LOSE_UNTRACKED_REMOVED */ + "Untracked working tree file '%s' would be removed by merge.", + + /* ERROR_BIND_OVERLAP */ "Entry '%s' overlaps with '%s'. Cannot bind.", - /* sparse_not_uptodate_file */ + /* ERROR_SPARSE_NOT_UPTODATE_FILE */ "Entry '%s' not uptodate. Cannot update sparse checkout.", - /* would_lose_orphaned */ - "Working tree file '%s' would be %s by sparse checkout update.", + /* ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN */ + "Working tree file '%s' would be overwritten by sparse checkout update.", + + /* ERROR_WOULD_LOSE_ORPHANED_REMOVED */ + "Working tree file '%s' would be removed by sparse checkout update.", }; -#define ERRORMSG(o,fld) \ - ( ((o) && (o)->msgs.fld) \ - ? ((o)->msgs.fld) \ - : (unpack_plumbing_errors.fld) ) +#define ERRORMSG(o,type) \ + ( ((o) && (o)->msgs[(type)]) \ + ? ((o)->msgs[(type)]) \ + : (unpack_plumbing_errors[(type)]) ) + +void setup_unpack_trees_porcelain(struct unpack_trees_options *opts, + const char *cmd) +{ + const char **msgs = opts->msgs; + const char *msg; + char *tmp; + const char *cmd2 = strcmp(cmd, "checkout") ? cmd : "switch branches"; + if (advice_commit_before_merge) + msg = "Your local changes to the following files would be overwritten by %s:\n%%s" + "Please, commit your changes or stash them before you can %s."; + else + msg = "Your local changes to the following files would be overwritten by %s:\n%%s"; + tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen(cmd2) - 2); + sprintf(tmp, msg, cmd, cmd2); + msgs[ERROR_WOULD_OVERWRITE] = tmp; + msgs[ERROR_NOT_UPTODATE_FILE] = tmp; + + msgs[ERROR_NOT_UPTODATE_DIR] = + "Updating the following directories would lose untracked files in it:\n%s"; + + if (advice_commit_before_merge) + msg = "The following untracked working tree files would be %s by %s:\n%%s" + "Please move or remove them before you can %s."; + else + msg = "The following untracked working tree files would be %s by %s:\n%%s"; + tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen("removed") + strlen(cmd2) - 4); + sprintf(tmp, msg, "removed", cmd, cmd2); + msgs[ERROR_WOULD_LOSE_UNTRACKED_REMOVED] = tmp; + tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen("overwritten") + strlen(cmd2) - 4); + sprintf(tmp, msg, "overwritten", cmd, cmd2); + msgs[ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN] = tmp; + + /* + * Special case: ERROR_BIND_OVERLAP refers to a pair of paths, we + * cannot easily display it as a list. + */ + msgs[ERROR_BIND_OVERLAP] = "Entry '%s' overlaps with '%s'. Cannot bind."; + + msgs[ERROR_SPARSE_NOT_UPTODATE_FILE] = + "Cannot update sparse checkout: the following entries are not up-to-date:\n%s"; + msgs[ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN] = + "The following Working tree files would be overwritten by sparse checkout update:\n%s"; + msgs[ERROR_WOULD_LOSE_ORPHANED_REMOVED] = + "The following Working tree files would be removed by sparse checkout update:\n%s"; + + opts->show_all_errors = 1; +} static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce, unsigned int set, unsigned int clear) @@ -53,6 +106,9 @@ static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce, clear |= CE_HASHED | CE_UNHASHED; + if (set & CE_REMOVE) + set |= CE_WT_REMOVE; + memcpy(new, ce, size); new->next = NULL; new->ce_flags = (new->ce_flags & ~clear) | set; @@ -60,6 +116,67 @@ static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce, } /* + * add error messages on path <path> + * corresponding to the type <e> with the message <msg> + * indicating if it should be display in porcelain or not + */ +static int add_rejected_path(struct unpack_trees_options *o, + enum unpack_trees_error_types e, + const char *path) +{ + struct rejected_paths_list *newentry; + if (!o->show_all_errors) + return error(ERRORMSG(o, e), path); + + /* + * Otherwise, insert in a list for future display by + * display_error_msgs() + */ + newentry = xmalloc(sizeof(struct rejected_paths_list)); + newentry->path = (char *)path; + newentry->next = o->unpack_rejects[e]; + o->unpack_rejects[e] = newentry; + return -1; +} + +/* + * free all the structures allocated for the error <e> + */ +static void free_rejected_paths(struct unpack_trees_options *o, + enum unpack_trees_error_types e) +{ + while (o->unpack_rejects[e]) { + struct rejected_paths_list *del = o->unpack_rejects[e]; + o->unpack_rejects[e] = o->unpack_rejects[e]->next; + free(del); + } + free(o->unpack_rejects[e]); +} + +/* + * display all the error messages stored in a nice way + */ +static void display_error_msgs(struct unpack_trees_options *o) +{ + int e; + int something_displayed = 0; + for (e = 0; e < NB_UNPACK_TREES_ERROR_TYPES; e++) { + if (o->unpack_rejects[e]) { + struct rejected_paths_list *rp; + struct strbuf path = STRBUF_INIT; + something_displayed = 1; + for (rp = o->unpack_rejects[e]; rp; rp = rp->next) + strbuf_addf(&path, "\t%s\n", rp->path); + error(ERRORMSG(o, e), path.buf); + strbuf_release(&path); + free_rejected_paths(o, e); + } + } + if (something_displayed) + printf("Aborting\n"); +} + +/* * Unlink the last component and schedule the leading directories for * removal, such that empty directories get removed. */ @@ -84,7 +201,7 @@ static int check_updates(struct unpack_trees_options *o) if (o->update && o->verbose_update) { for (total = cnt = 0; cnt < index->cache_nr; cnt++) { struct cache_entry *ce = index->cache[cnt]; - if (ce->ce_flags & (CE_UPDATE | CE_REMOVE | CE_WT_REMOVE)) + if (ce->ce_flags & (CE_UPDATE | CE_WT_REMOVE)) total++; } @@ -104,12 +221,6 @@ static int check_updates(struct unpack_trees_options *o) unlink_entry(ce); continue; } - - if (ce->ce_flags & CE_REMOVE) { - display_progress(progress, ++cnt); - if (o->update) - unlink_entry(ce); - } } remove_marked_cache_entries(&o->result); remove_scheduled_dirs(); @@ -132,15 +243,12 @@ static int check_updates(struct unpack_trees_options *o) } static int verify_uptodate_sparse(struct cache_entry *ce, struct unpack_trees_options *o); -static int verify_absent_sparse(struct cache_entry *ce, const char *action, struct unpack_trees_options *o); +static int verify_absent_sparse(struct cache_entry *ce, enum unpack_trees_error_types, struct unpack_trees_options *o); static int will_have_skip_worktree(const struct cache_entry *ce, struct unpack_trees_options *o) { const char *basename; - if (ce_stage(ce)) - return 0; - basename = strrchr(ce->name, '/'); basename = basename ? basename+1 : ce->name; return excluded_from_list(ce->name, ce_namelen(ce), basename, NULL, o->el) <= 0; @@ -150,19 +258,36 @@ static int apply_sparse_checkout(struct cache_entry *ce, struct unpack_trees_opt { int was_skip_worktree = ce_skip_worktree(ce); - if (will_have_skip_worktree(ce, o)) + if (!ce_stage(ce) && will_have_skip_worktree(ce, o)) ce->ce_flags |= CE_SKIP_WORKTREE; else ce->ce_flags &= ~CE_SKIP_WORKTREE; /* - * We only care about files getting into the checkout area - * If merge strategies want to remove some, go ahead, this - * flag will be removed eventually in unpack_trees() if it's - * outside checkout area. + * if (!was_skip_worktree && !ce_skip_worktree()) { + * This is perfectly normal. Move on; + * } */ - if (ce->ce_flags & CE_REMOVE) - return 0; + + /* + * Merge strategies may set CE_UPDATE|CE_REMOVE outside checkout + * area as a result of ce_skip_worktree() shortcuts in + * verify_absent() and verify_uptodate(). + * Make sure they don't modify worktree if they are already + * outside checkout area + */ + if (was_skip_worktree && ce_skip_worktree(ce)) { + ce->ce_flags &= ~CE_UPDATE; + + /* + * By default, when CE_REMOVE is on, CE_WT_REMOVE is also + * on to get that file removed from both index and worktree. + * If that file is already outside worktree area, don't + * bother remove it. + */ + if (ce->ce_flags & CE_REMOVE) + ce->ce_flags &= ~CE_WT_REMOVE; + } if (!was_skip_worktree && ce_skip_worktree(ce)) { /* @@ -175,7 +300,7 @@ static int apply_sparse_checkout(struct cache_entry *ce, struct unpack_trees_opt ce->ce_flags |= CE_WT_REMOVE; } if (was_skip_worktree && !ce_skip_worktree(ce)) { - if (verify_absent_sparse(ce, "overwritten", o)) + if (verify_absent_sparse(ce, ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o)) return -1; ce->ce_flags |= CE_UPDATE; } @@ -755,6 +880,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options setup_traverse_info(&info, prefix); info.fn = unpack_callback; info.data = o; + info.show_all_errors = o->show_all_errors; if (o->prefix) { /* @@ -803,14 +929,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options ret = -1; goto done; } - /* - * Merge strategies may set CE_UPDATE|CE_REMOVE outside checkout - * area as a result of ce_skip_worktree() shortcuts in - * verify_absent() and verify_uptodate(). Clear them. - */ - if (ce_skip_worktree(ce)) - ce->ce_flags &= ~(CE_UPDATE | CE_REMOVE); - else + if (!ce_skip_worktree(ce)) empty_worktree = 0; } @@ -834,6 +953,8 @@ done: return ret; return_failed: + if (o->show_all_errors) + display_error_msgs(o); mark_all_ce_unused(o->src_index); ret = unpack_failed(o, NULL); goto done; @@ -843,7 +964,7 @@ return_failed: static int reject_merge(struct cache_entry *ce, struct unpack_trees_options *o) { - return error(ERRORMSG(o, would_overwrite), ce->name); + return add_rejected_path(o, ERROR_WOULD_OVERWRITE, ce->name); } static int same(struct cache_entry *a, struct cache_entry *b) @@ -865,7 +986,7 @@ static int same(struct cache_entry *a, struct cache_entry *b) */ static int verify_uptodate_1(struct cache_entry *ce, struct unpack_trees_options *o, - const char *error_msg) + enum unpack_trees_error_types error_type) { struct stat st; @@ -890,7 +1011,7 @@ static int verify_uptodate_1(struct cache_entry *ce, if (errno == ENOENT) return 0; return o->gently ? -1 : - error(error_msg, ce->name); + add_rejected_path(o, error_type, ce->name); } static int verify_uptodate(struct cache_entry *ce, @@ -898,13 +1019,13 @@ static int verify_uptodate(struct cache_entry *ce, { if (!o->skip_sparse_checkout && will_have_skip_worktree(ce, o)) return 0; - return verify_uptodate_1(ce, o, ERRORMSG(o, not_uptodate_file)); + return verify_uptodate_1(ce, o, ERROR_NOT_UPTODATE_FILE); } static int verify_uptodate_sparse(struct cache_entry *ce, struct unpack_trees_options *o) { - return verify_uptodate_1(ce, o, ERRORMSG(o, sparse_not_uptodate_file)); + return verify_uptodate_1(ce, o, ERROR_SPARSE_NOT_UPTODATE_FILE); } static void invalidate_ce_path(struct cache_entry *ce, struct unpack_trees_options *o) @@ -920,13 +1041,15 @@ static void invalidate_ce_path(struct cache_entry *ce, struct unpack_trees_optio * Currently, git does not checkout subprojects during a superproject * checkout, so it is not going to overwrite anything. */ -static int verify_clean_submodule(struct cache_entry *ce, const char *action, +static int verify_clean_submodule(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { return 0; } -static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, +static int verify_clean_subdirectory(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { /* @@ -947,7 +1070,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, */ if (!hashcmp(sha1, ce->sha1)) return 0; - return verify_clean_submodule(ce, action, o); + return verify_clean_submodule(ce, error_type, o); } /* @@ -991,7 +1114,7 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, i = read_directory(&d, pathbuf, namelen+1, NULL); if (i) return o->gently ? -1 : - error(ERRORMSG(o, not_uptodate_dir), ce->name); + add_rejected_path(o, ERROR_NOT_UPTODATE_DIR, ce->name); free(pathbuf); return cnt; } @@ -1016,9 +1139,9 @@ static int icase_exists(struct unpack_trees_options *o, struct cache_entry *dst, * We do not want to remove or overwrite a working tree file that * is not tracked, unless it is ignored. */ -static int verify_absent_1(struct cache_entry *ce, const char *action, - struct unpack_trees_options *o, - const char *error_msg) +static int verify_absent_1(struct cache_entry *ce, + enum unpack_trees_error_types error_type, + struct unpack_trees_options *o) { struct stat st; @@ -1056,7 +1179,7 @@ static int verify_absent_1(struct cache_entry *ce, const char *action, * files that are in "foo/" we would lose * them. */ - if (verify_clean_subdirectory(ce, action, o) < 0) + if (verify_clean_subdirectory(ce, error_type, o) < 0) return -1; return 0; } @@ -1073,22 +1196,28 @@ static int verify_absent_1(struct cache_entry *ce, const char *action, } return o->gently ? -1 : - error(ERRORMSG(o, would_lose_untracked), ce->name, action); + add_rejected_path(o, error_type, ce->name); } return 0; } -static int verify_absent(struct cache_entry *ce, const char *action, +static int verify_absent(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { if (!o->skip_sparse_checkout && will_have_skip_worktree(ce, o)) return 0; - return verify_absent_1(ce, action, o, ERRORMSG(o, would_lose_untracked)); + return verify_absent_1(ce, error_type, o); } -static int verify_absent_sparse(struct cache_entry *ce, const char *action, +static int verify_absent_sparse(struct cache_entry *ce, + enum unpack_trees_error_types error_type, struct unpack_trees_options *o) { - return verify_absent_1(ce, action, o, ERRORMSG(o, would_lose_orphaned)); + enum unpack_trees_error_types orphaned_error = error_type; + if (orphaned_error == ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN) + orphaned_error = ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN; + + return verify_absent_1(ce, orphaned_error, o); } static int merged_entry(struct cache_entry *merge, struct cache_entry *old, @@ -1097,8 +1226,10 @@ static int merged_entry(struct cache_entry *merge, struct cache_entry *old, int update = CE_UPDATE; if (!old) { - if (verify_absent(merge, "overwritten", o)) + if (verify_absent(merge, ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o)) return -1; + if (!o->skip_sparse_checkout && will_have_skip_worktree(merge, o)) + update |= CE_SKIP_WORKTREE; invalidate_ce_path(merge, o); } else if (!(old->ce_flags & CE_CONFLICTED)) { /* @@ -1135,7 +1266,7 @@ static int deleted_entry(struct cache_entry *ce, struct cache_entry *old, { /* Did it exist in the index? */ if (!old) { - if (verify_absent(ce, "removed", o)) + if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o)) return -1; return 0; } @@ -1284,7 +1415,7 @@ int threeway_merge(struct cache_entry **stages, struct unpack_trees_options *o) if (index) return deleted_entry(index, index, o); if (ce && !head_deleted) { - if (verify_absent(ce, "removed", o)) + if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o)) return -1; } return 0; @@ -1417,7 +1548,7 @@ int bind_merge(struct cache_entry **src, o->merge_size); if (a && old) return o->gently ? -1 : - error(ERRORMSG(o, bind_overlap), a->name, old->name); + error(ERRORMSG(o, ERROR_BIND_OVERLAP), a->name, old->name); if (!a) return keep_entry(old, o); else diff --git a/unpack-trees.h b/unpack-trees.h index ef70eab390..7c0187d11a 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -9,14 +9,29 @@ struct exclude_list; typedef int (*merge_fn_t)(struct cache_entry **src, struct unpack_trees_options *options); -struct unpack_trees_error_msgs { - const char *would_overwrite; - const char *not_uptodate_file; - const char *not_uptodate_dir; - const char *would_lose_untracked; - const char *bind_overlap; - const char *sparse_not_uptodate_file; - const char *would_lose_orphaned; +enum unpack_trees_error_types { + ERROR_WOULD_OVERWRITE = 0, + ERROR_NOT_UPTODATE_FILE, + ERROR_NOT_UPTODATE_DIR, + ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, + ERROR_WOULD_LOSE_UNTRACKED_REMOVED, + ERROR_BIND_OVERLAP, + ERROR_SPARSE_NOT_UPTODATE_FILE, + ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN, + ERROR_WOULD_LOSE_ORPHANED_REMOVED, + NB_UNPACK_TREES_ERROR_TYPES +}; + +/* + * Sets the list of user-friendly error messages to be used by the + * command "cmd" (either merge or checkout), and show_all_errors to 1. + */ +void setup_unpack_trees_porcelain(struct unpack_trees_options *opts, + const char *cmd); + +struct rejected_paths_list { + char *path; + struct rejected_paths_list *next; }; struct unpack_trees_options { @@ -33,12 +48,18 @@ struct unpack_trees_options { diff_index_cached, debug_unpack, skip_sparse_checkout, - gently; + gently, + show_all_errors; const char *prefix; int cache_bottom; struct dir_struct *dir; merge_fn_t fn; - struct unpack_trees_error_msgs msgs; + const char *msgs[NB_UNPACK_TREES_ERROR_TYPES]; + /* + * Store error messages in an array, each case + * corresponding to a error message type + */ + struct rejected_paths_list *unpack_rejects[NB_UNPACK_TREES_ERROR_TYPES]; int head_idx; int merge_size; diff --git a/upload-pack.c b/upload-pack.c index dc464d78b3..92f9530c65 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -105,7 +105,7 @@ static void show_edge(struct commit *commit) fprintf(pack_pipe, "-%s\n", sha1_to_hex(commit->object.sha1)); } -static int do_rev_list(int in, int out, void *create_full_pack) +static int do_rev_list(int in, int out, void *user_data) { int i; struct rev_info revs; @@ -118,23 +118,18 @@ static int do_rev_list(int in, int out, void *create_full_pack) if (use_thin_pack) revs.edge_hint = 1; - if (create_full_pack) { - const char *args[] = {"rev-list", "--all", NULL}; - setup_revisions(2, args, &revs, NULL); - } else { - for (i = 0; i < want_obj.nr; i++) { - struct object *o = want_obj.objects[i].item; - /* why??? */ - o->flags &= ~UNINTERESTING; - add_pending_object(&revs, o, NULL); - } - for (i = 0; i < have_obj.nr; i++) { - struct object *o = have_obj.objects[i].item; - o->flags |= UNINTERESTING; - add_pending_object(&revs, o, NULL); - } - setup_revisions(0, NULL, &revs, NULL); + for (i = 0; i < want_obj.nr; i++) { + struct object *o = want_obj.objects[i].item; + /* why??? */ + o->flags &= ~UNINTERESTING; + add_pending_object(&revs, o, NULL); + } + for (i = 0; i < have_obj.nr; i++) { + struct object *o = have_obj.objects[i].item; + o->flags |= UNINTERESTING; + add_pending_object(&revs, o, NULL); } + setup_revisions(0, NULL, &revs, NULL); if (prepare_revision_walk(&revs)) die("revision walk setup failed"); mark_edges_uninteresting(revs.commits, &revs, show_edge); @@ -487,7 +482,7 @@ static int get_common_commits(void) static void receive_needs(void) { - struct object_array shallows = {0, 0, NULL}; + struct object_array shallows = OBJECT_ARRAY_INIT; static char line[1000]; int len, depth = 0; @@ -554,7 +549,8 @@ static void receive_needs(void) */ o = lookup_object(sha1_buf); if (!o || !(o->flags & OUR_REF)) - die("git upload-pack: not our ref %s", line+5); + die("git upload-pack: not our ref %s", + sha1_to_hex(sha1_buf)); if (!(o->flags & WANTED)) { o->flags |= WANTED; add_object_array(o, NULL, &want_obj); @@ -67,7 +67,8 @@ static int url_decode_char(const char *q) return val; } -static char *url_decode_internal(const char **query, const char *stop_at, struct strbuf *out) +static char *url_decode_internal(const char **query, const char *stop_at, + struct strbuf *out, int decode_plus) { const char *q = *query; @@ -90,7 +91,7 @@ static char *url_decode_internal(const char **query, const char *stop_at, struct } } - if (c == '+') + if (decode_plus && c == '+') strbuf_addch(out, ' '); else strbuf_addch(out, c); @@ -110,17 +111,17 @@ char *url_decode(const char *url) strbuf_add(&out, url, colon - url); url = colon; } - return url_decode_internal(&url, NULL, &out); + return url_decode_internal(&url, NULL, &out, 0); } char *url_decode_parameter_name(const char **query) { struct strbuf out = STRBUF_INIT; - return url_decode_internal(query, "&=", &out); + return url_decode_internal(query, "&=", &out, 1); } char *url_decode_parameter_value(const char **query) { struct strbuf out = STRBUF_INIT; - return url_decode_internal(query, "&", &out); + return url_decode_internal(query, "&", &out, 1); } diff --git a/userdiff.c b/userdiff.c index c49cc1b67e..e5522159b3 100644 --- a/userdiff.c +++ b/userdiff.c @@ -82,6 +82,22 @@ PATTERNS("cpp", "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("csharp", + /* Keywords */ + "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" + /* Methods and constructors */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" + /* Properties */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* Type definitions */ + "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct)[ \t]+.*)$\n" + /* Namespace */ + "^[ \t]*(namespace[ \t]+.*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), { "default", NULL, -1, { NULL, 0 } }, }; #undef PATTERNS diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE new file mode 100644 index 0000000000..0a5e3c43a0 --- /dev/null +++ b/vcs-svn/LICENSE @@ -0,0 +1,33 @@ +Copyright (C) 2010 David Barr <david.barr@cordelta.com>. +All rights reserved. + +Copyright (C) 2008 Jason Evans <jasone@canonware.com>. +All rights reserved. + +Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, +Frankfurt/Main, Germany +and others, see http://svn2cc.sarovar.org + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice(s), this list of conditions and the following disclaimer + unmodified other than the allowable addition of one or more + copyright notices. +2. Redistributions in binary form must reproduce the above copyright + notice(s), this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c new file mode 100644 index 0000000000..256a0522b2 --- /dev/null +++ b/vcs-svn/fast_export.c @@ -0,0 +1,75 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "repo_tree.h" +#include "string_pool.h" + +#define MAX_GITSVN_LINE_LEN 4096 + +static uint32_t first_commit_done; + +void fast_export_delete(uint32_t depth, uint32_t *path) +{ + putchar('D'); + putchar(' '); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark) +{ + /* Mode must be 100644, 100755, 120000, or 160000. */ + printf("M %06o :%d ", mode, mark); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +static char gitsvnline[MAX_GITSVN_LINE_LEN]; +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, + unsigned long timestamp) +{ + if (!log) + log = ""; + if (~uuid && ~url) { + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n", + pool_fetch(url), revision, pool_fetch(uuid)); + } else { + *gitsvnline = '\0'; + } + printf("commit refs/heads/master\n"); + printf("committer %s <%s@%s> %ld +0000\n", + ~author ? pool_fetch(author) : "nobody", + ~author ? pool_fetch(author) : "nobody", + ~uuid ? pool_fetch(uuid) : "local", timestamp); + printf("data %"PRIu32"\n%s%s\n", + (uint32_t) (strlen(log) + strlen(gitsvnline)), + log, gitsvnline); + if (!first_commit_done) { + if (revision > 1) + printf("from refs/heads/master^0\n"); + first_commit_done = 1; + } + repo_diff(revision - 1, revision); + fputc('\n', stdout); + + printf("progress Imported commit %d.\n\n", revision); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +{ + if (mode == REPO_MODE_LNK) { + /* svn symlink blobs start with "link " */ + buffer_skip_bytes(5); + len -= 5; + } + printf("blob\nmark :%d\ndata %d\n", mark, len); + buffer_copy_bytes(len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h new file mode 100644 index 0000000000..2aaaea53d5 --- /dev/null +++ b/vcs-svn/fast_export.h @@ -0,0 +1,11 @@ +#ifndef FAST_EXPORT_H_ +#define FAST_EXPORT_H_ + +void fast_export_delete(uint32_t depth, uint32_t *path); +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark); +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, unsigned long timestamp); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); + +#endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c new file mode 100644 index 0000000000..1543567093 --- /dev/null +++ b/vcs-svn/line_buffer.c @@ -0,0 +1,97 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "obj_pool.h" + +#define LINE_BUFFER_LEN 10000 +#define COPY_BUFFER_LEN 4096 + +/* Create memory pool for char sequence of known length */ +obj_pool_gen(blob, char, 4096) + +static char line_buffer[LINE_BUFFER_LEN]; +static char byte_buffer[COPY_BUFFER_LEN]; +static FILE *infile; + +int buffer_init(const char *filename) +{ + infile = filename ? fopen(filename, "r") : stdin; + if (!infile) + return -1; + return 0; +} + +int buffer_deinit(void) +{ + int err; + if (infile == stdin) + return ferror(infile); + err = ferror(infile); + err |= fclose(infile); + return err; +} + +/* Read a line without trailing newline. */ +char *buffer_read_line(void) +{ + char *end; + if (!fgets(line_buffer, sizeof(line_buffer), infile)) + /* Error or data exhausted. */ + return NULL; + end = line_buffer + strlen(line_buffer); + if (end[-1] == '\n') + end[-1] = '\0'; + else if (feof(infile)) + ; /* No newline at end of file. That's fine. */ + else + /* + * Line was too long. + * There is probably a saner way to deal with this, + * but for now let's return an error. + */ + return NULL; + return line_buffer; +} + +char *buffer_read_string(uint32_t len) +{ + char *s; + blob_free(blob_pool.size); + s = blob_pointer(blob_alloc(len + 1)); + s[fread(s, 1, len, infile)] = '\0'; + return ferror(infile) ? NULL : s; +} + +void buffer_copy_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + fwrite(byte_buffer, 1, in, stdout); + if (ferror(stdout)) { + buffer_skip_bytes(len); + return; + } + } +} + +void buffer_skip_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + } +} + +void buffer_reset(void) +{ + blob_reset(); +} diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h new file mode 100644 index 0000000000..9c78ae11a1 --- /dev/null +++ b/vcs-svn/line_buffer.h @@ -0,0 +1,12 @@ +#ifndef LINE_BUFFER_H_ +#define LINE_BUFFER_H_ + +int buffer_init(const char *filename); +int buffer_deinit(void); +char *buffer_read_line(void); +char *buffer_read_string(uint32_t len); +void buffer_copy_bytes(uint32_t len); +void buffer_skip_bytes(uint32_t len); +void buffer_reset(void); + +#endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt new file mode 100644 index 0000000000..8906fb1f50 --- /dev/null +++ b/vcs-svn/line_buffer.txt @@ -0,0 +1,58 @@ +line_buffer API +=============== + +The line_buffer library provides a convenient interface for +mostly-line-oriented input. + +Each line is not permitted to exceed 10000 bytes. The provided +functions are not thread-safe or async-signal-safe, and like +`fgets()`, they generally do not function correctly if interrupted +by a signal without SA_RESTART set. + +Calling sequence +---------------- + +The calling program: + + - specifies a file to read with `buffer_init` + - processes input with `buffer_read_line`, `buffer_read_string`, + `buffer_skip_bytes`, and `buffer_copy_bytes` + - closes the file with `buffer_deinit`, perhaps to start over and + read another file. + +Before exiting, the caller can use `buffer_reset` to deallocate +resources for the benefit of profiling tools. + +Functions +--------- + +`buffer_init`:: + Open the named file for input. If filename is NULL, + start reading from stdin. On failure, returns -1 (with + errno indicating the nature of the failure). + +`buffer_deinit`:: + Stop reading from the current file (closing it unless + it was stdin). Returns nonzero if `fclose` fails or + the error indicator was set. + +`buffer_read_line`:: + Read a line and strip off the trailing newline. + On failure or end of file, returns NULL. + +`buffer_read_string`:: + Read `len` characters of input or up to the end of the + file, whichever comes first. Returns NULL on error. + Returns whatever characters were read (possibly "") + for end of file. + +`buffer_copy_bytes`:: + Read `len` bytes of input and dump them to the standard output + stream. Returns early for error or end of file. + +`buffer_skip_bytes`:: + Discards `len` bytes from the input stream (stopping early + if necessary because of an error or eof). + +`buffer_reset`:: + Deallocates non-static buffers. diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h new file mode 100644 index 0000000000..deb6eb8135 --- /dev/null +++ b/vcs-svn/obj_pool.h @@ -0,0 +1,61 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef OBJ_POOL_H_ +#define OBJ_POOL_H_ + +#include "git-compat-util.h" + +#define MAYBE_UNUSED __attribute__((__unused__)) + +#define obj_pool_gen(pre, obj_t, initial_capacity) \ +static struct { \ + uint32_t committed; \ + uint32_t size; \ + uint32_t capacity; \ + obj_t *base; \ +} pre##_pool = {0, 0, 0, NULL}; \ +static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ +{ \ + uint32_t offset; \ + if (pre##_pool.size + count > pre##_pool.capacity) { \ + while (pre##_pool.size + count > pre##_pool.capacity) \ + if (pre##_pool.capacity) \ + pre##_pool.capacity *= 2; \ + else \ + pre##_pool.capacity = initial_capacity; \ + pre##_pool.base = realloc(pre##_pool.base, \ + pre##_pool.capacity * sizeof(obj_t)); \ + } \ + offset = pre##_pool.size; \ + pre##_pool.size += count; \ + return offset; \ +} \ +static MAYBE_UNUSED void pre##_free(uint32_t count) \ +{ \ + pre##_pool.size -= count; \ +} \ +static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ +{ \ + return obj == NULL ? ~0 : obj - pre##_pool.base; \ +} \ +static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ +{ \ + return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ +} \ +static MAYBE_UNUSED void pre##_commit(void) \ +{ \ + pre##_pool.committed = pre##_pool.size; \ +} \ +static MAYBE_UNUSED void pre##_reset(void) \ +{ \ + free(pre##_pool.base); \ + pre##_pool.base = NULL; \ + pre##_pool.size = 0; \ + pre##_pool.capacity = 0; \ + pre##_pool.committed = 0; \ +} + +#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c new file mode 100644 index 0000000000..e94d91d129 --- /dev/null +++ b/vcs-svn/repo_tree.c @@ -0,0 +1,329 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" + +#include "string_pool.h" +#include "repo_tree.h" +#include "obj_pool.h" +#include "fast_export.h" + +#include "trp.h" + +struct repo_dirent { + uint32_t name_offset; + struct trp_node children; + uint32_t mode; + uint32_t content_offset; +}; + +struct repo_dir { + struct trp_root entries; +}; + +struct repo_commit { + uint32_t root_dir_offset; +}; + +/* Memory pools for commit, dir and dirent */ +obj_pool_gen(commit, struct repo_commit, 4096) +obj_pool_gen(dir, struct repo_dir, 4096) +obj_pool_gen(dent, struct repo_dirent, 4096) + +static uint32_t active_commit; +static uint32_t mark; + +static int repo_dirent_name_cmp(const void *a, const void *b); + +/* Treap for directory entries */ +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); + +uint32_t next_blob_mark(void) +{ + return mark++; +} + +static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) +{ + return dir_pointer(commit->root_dir_offset); +} + +static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) +{ + return dent_first(&dir->entries); +} + +static int repo_dirent_name_cmp(const void *a, const void *b) +{ + const struct repo_dirent *dent1 = a, *dent2 = b; + uint32_t a_offset = dent1->name_offset; + uint32_t b_offset = dent2->name_offset; + return (a_offset > b_offset) - (a_offset < b_offset); +} + +static int repo_dirent_is_dir(struct repo_dirent *dent) +{ + return dent != NULL && dent->mode == REPO_MODE_DIR; +} + +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) +{ + if (!repo_dirent_is_dir(dent)) + return NULL; + return dir_pointer(dent->content_offset); +} + +static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) +{ + uint32_t orig_o, new_o; + orig_o = dir_offset(orig_dir); + if (orig_o >= dir_pool.committed) + return orig_dir; + new_o = dir_alloc(1); + orig_dir = dir_pointer(orig_o); + *dir_pointer(new_o) = *orig_dir; + return dir_pointer(new_o); +} + +static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) +{ + uint32_t name = 0; + struct repo_dirent *key = dent_pointer(dent_alloc(1)); + struct repo_dir *dir = NULL; + struct repo_dirent *dent = NULL; + dir = repo_commit_root_dir(commit_pointer(revision)); + while (~(name = *path++)) { + key->name_offset = name; + dent = dent_search(&dir->entries, key); + if (dent == NULL || !repo_dirent_is_dir(dent)) + break; + dir = repo_dir_from_dirent(dent); + } + dent_free(1); + return dent; +} + +static void repo_write_dirent(uint32_t *path, uint32_t mode, + uint32_t content_offset, uint32_t del) +{ + uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; + struct repo_dir *dir; + struct repo_dirent *key; + struct repo_dirent *dent = NULL; + revision = active_commit; + dir = repo_commit_root_dir(commit_pointer(revision)); + dir = repo_clone_dir(dir); + commit_pointer(revision)->root_dir_offset = dir_offset(dir); + while (~(name = *path++)) { + parent_dir_o = dir_offset(dir); + + key = dent_pointer(dent_alloc(1)); + key->name_offset = name; + + dent = dent_search(&dir->entries, key); + if (dent == NULL) + dent = key; + else + dent_free(1); + + if (dent == key) { + dent->mode = REPO_MODE_DIR; + dent->content_offset = 0; + dent_insert(&dir->entries, dent); + } + + if (dent_offset(dent) < dent_pool.committed) { + dir_o = repo_dirent_is_dir(dent) ? + dent->content_offset : ~0; + dent_remove(&dir->entries, dent); + dent = dent_pointer(dent_alloc(1)); + dent->name_offset = name; + dent->mode = REPO_MODE_DIR; + dent->content_offset = dir_o; + dent_insert(&dir->entries, dent); + } + + dir = repo_dir_from_dirent(dent); + dir = repo_clone_dir(dir); + dent->content_offset = dir_offset(dir); + } + if (dent == NULL) + return; + dent->mode = mode; + dent->content_offset = content_offset; + if (del && ~parent_dir_o) + dent_remove(&dir_pointer(parent_dir_o)->entries, dent); +} + +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) +{ + uint32_t mode = 0, content_offset = 0; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(revision, src); + if (src_dent != NULL) { + mode = src_dent->mode; + content_offset = src_dent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); + } + return mode; +} + +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +{ + uint32_t mode = 0; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL) { + mode = src_dent->mode; + repo_write_dirent(path, mode, blob_mark, 0); + } + return mode; +} + +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL && blob_mark == 0) + blob_mark = src_dent->content_offset; + repo_write_dirent(path, mode, blob_mark, 0); +} + +void repo_delete(uint32_t *path) +{ + repo_write_dirent(path, 0, 0, 1); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); + +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) +{ + if (repo_dirent_is_dir(dent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); + else + fast_export_modify(depth, path, + dent->mode, dent->content_offset); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) +{ + struct repo_dirent *de = repo_first_dirent(dir); + while (de) { + path[depth] = de->name_offset; + repo_git_add(depth + 1, path, de); + de = dent_next(&dir->entries, de); + } +} + +static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, + struct repo_dir *dir2) +{ + struct repo_dirent *de1, *de2; + de1 = repo_first_dirent(dir1); + de2 = repo_first_dirent(dir2); + + while (de1 && de2) { + if (de1->name_offset < de2->name_offset) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + continue; + } + if (de1->name_offset > de2->name_offset) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + continue; + } + path[depth] = de1->name_offset; + + if (de1->mode == de2->mode && + de1->content_offset == de2->content_offset) { + ; /* No change. */ + } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { + repo_diff_r(depth + 1, path, + repo_dir_from_dirent(de1), + repo_dir_from_dirent(de2)); + } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { + repo_git_add(depth + 1, path, de2); + } else { + fast_export_delete(depth + 1, path); + repo_git_add(depth + 1, path, de2); + } + de1 = dent_next(&dir1->entries, de1); + de2 = dent_next(&dir2->entries, de2); + } + while (de1) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + } + while (de2) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + } +} + +static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; + +void repo_diff(uint32_t r1, uint32_t r2) +{ + repo_diff_r(0, + path_stack, + repo_commit_root_dir(commit_pointer(r1)), + repo_commit_root_dir(commit_pointer(r2))); +} + +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, unsigned long timestamp) +{ + fast_export_commit(revision, author, log, uuid, url, timestamp); + dent_commit(); + dir_commit(); + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +static void mark_init(void) +{ + uint32_t i; + mark = 0; + for (i = 0; i < dent_pool.size; i++) + if (!repo_dirent_is_dir(dent_pointer(i)) && + dent_pointer(i)->content_offset > mark) + mark = dent_pointer(i)->content_offset; + mark++; +} + +void repo_init(void) +{ + mark_init(); + if (commit_pool.size == 0) { + /* Create empty tree for commit 0. */ + commit_alloc(1); + commit_pointer(0)->root_dir_offset = dir_alloc(1); + dir_pointer(0)->entries.trp_root = ~0; + dir_commit(); + } + /* Preallocate next commit, ready for changes. */ + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +void repo_reset(void) +{ + pool_reset(); + commit_reset(); + dir_reset(); + dent_reset(); +} diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h new file mode 100644 index 0000000000..5476175922 --- /dev/null +++ b/vcs-svn/repo_tree.h @@ -0,0 +1,26 @@ +#ifndef REPO_TREE_H_ +#define REPO_TREE_H_ + +#include "git-compat-util.h" + +#define REPO_MODE_DIR 0040000 +#define REPO_MODE_BLB 0100644 +#define REPO_MODE_EXE 0100755 +#define REPO_MODE_LNK 0120000 + +#define REPO_MAX_PATH_LEN 4096 +#define REPO_MAX_PATH_DEPTH 1000 + +uint32_t next_blob_mark(void); +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +void repo_delete(uint32_t *path); +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, long unsigned timestamp); +void repo_diff(uint32_t r1, uint32_t r2); +void repo_init(void); +void repo_reset(void); + +#endif diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c new file mode 100644 index 0000000000..f5b1da836e --- /dev/null +++ b/vcs-svn/string_pool.c @@ -0,0 +1,102 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "trp.h" +#include "obj_pool.h" +#include "string_pool.h" + +static struct trp_root tree = { ~0 }; + +struct node { + uint32_t offset; + struct trp_node children; +}; + +/* Two memory pools: one for struct node, and another for strings */ +obj_pool_gen(node, struct node, 4096) +obj_pool_gen(string, char, 4096) + +static char *node_value(struct node *node) +{ + return node ? string_pointer(node->offset) : NULL; +} + +static int node_cmp(struct node *a, struct node *b) +{ + return strcmp(node_value(a), node_value(b)); +} + +/* Build a Treap from the node structure (a trp_node w/ offset) */ +trp_gen(static, tree_, struct node, children, node, node_cmp); + +const char *pool_fetch(uint32_t entry) +{ + return node_value(node_pointer(entry)); +} + +uint32_t pool_intern(const char *key) +{ + /* Canonicalize key */ + struct node *match = NULL, *node; + uint32_t key_len; + if (key == NULL) + return ~0; + key_len = strlen(key) + 1; + node = node_pointer(node_alloc(1)); + node->offset = string_alloc(key_len); + strcpy(node_value(node), key); + match = tree_search(&tree, node); + if (!match) { + tree_insert(&tree, node); + } else { + node_free(1); + string_free(key_len); + node = match; + } + return node_offset(node); +} + +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) +{ + char *token = strtok_r(str, delim, saveptr); + return token ? pool_intern(token) : ~0; +} + +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) +{ + uint32_t i; + for (i = 0; i < len && ~seq[i]; i++) { + fputs(pool_fetch(seq[i]), stream); + if (i < len - 1 && ~seq[i + 1]) + fputc(delim, stream); + } +} + +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) +{ + char *context = NULL; + uint32_t token = ~0; + uint32_t length; + + if (sz == 0) + return ~0; + if (str) + token = pool_tok_r(str, delim, &context); + for (length = 0; length < sz; length++) { + seq[length] = token; + if (token == ~0) + return length; + token = pool_tok_r(NULL, delim, &context); + } + seq[sz - 1] = ~0; + return sz; +} + +void pool_reset(void) +{ + node_reset(); + string_reset(); +} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h new file mode 100644 index 0000000000..222fb66e68 --- /dev/null +++ b/vcs-svn/string_pool.h @@ -0,0 +1,11 @@ +#ifndef STRING_POOL_H_ +#define STRING_POOL_H_ + +uint32_t pool_intern(const char *key); +const char *pool_fetch(uint32_t entry); +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); +void pool_reset(void); + +#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt new file mode 100644 index 0000000000..1b41f15628 --- /dev/null +++ b/vcs-svn/string_pool.txt @@ -0,0 +1,43 @@ +string_pool API +=============== + +The string_pool API provides facilities for replacing strings +with integer keys that can be more easily compared and stored. +The facilities are designed so that one could teach Git without +too much trouble to store the information needed for these keys to +remain valid over multiple executions. + +Functions +--------- + +pool_intern:: + Include a string in the string pool and get its key. + If that string is already in the pool, retrieves its + existing key. + +pool_fetch:: + Retrieve the string associated to a given key. + +pool_tok_r:: + Extract the key of the next token from a string. + Interface mimics strtok_r. + +pool_print_seq:: + Print a sequence of strings named by key to a file, using the + specified delimiter to separate them. + + If NULL (key ~0) appears in the sequence, the sequence ends + early. + +pool_tok_seq:: + Split a string into tokens, storing the keys of segments + into a caller-provided array. + + Unless sz is 0, the array will always be ~0-terminated. + If there is not enough room for all the tokens, the + array holds as many tokens as fit in the entries before + the terminating ~0. Return value is the index after the + last token, or sz if the tokens did not fit. + +pool_reset:: + Deallocate storage for the string pool. diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c new file mode 100644 index 0000000000..630eeb53b7 --- /dev/null +++ b/vcs-svn/svndump.c @@ -0,0 +1,302 @@ +/* + * Parse and rearrange a svnadmin dump. + * Create the dump with: + * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "cache.h" +#include "repo_tree.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "obj_pool.h" +#include "string_pool.h" + +#define NODEACT_REPLACE 4 +#define NODEACT_DELETE 3 +#define NODEACT_ADD 2 +#define NODEACT_CHANGE 1 +#define NODEACT_UNKNOWN 0 + +#define DUMP_CTX 0 +#define REV_CTX 1 +#define NODE_CTX 2 + +#define LENGTH_UNKNOWN (~0) +#define DATE_RFC2822_LEN 31 + +/* Create memory pool for log messages */ +obj_pool_gen(log, char, 4096) + +static char* log_copy(uint32_t length, char *log) +{ + char *buffer; + log_free(log_pool.size); + buffer = log_pointer(log_alloc(length)); + strncpy(buffer, log, length); + return buffer; +} + +static struct { + uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; +} node_ctx; + +static struct { + uint32_t revision, author; + unsigned long timestamp; + char *log; +} rev_ctx; + +static struct { + uint32_t uuid, url; +} dump_ctx; + +static struct { + uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, + revision_number, node_path, node_kind, node_action, + node_copyfrom_path, node_copyfrom_rev, text_content_length, + prop_content_length, content_length; +} keys; + +static void reset_node_ctx(char *fname) +{ + node_ctx.type = 0; + node_ctx.action = NODEACT_UNKNOWN; + node_ctx.propLength = LENGTH_UNKNOWN; + node_ctx.textLength = LENGTH_UNKNOWN; + node_ctx.src[0] = ~0; + node_ctx.srcRev = 0; + node_ctx.srcMode = 0; + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + node_ctx.mark = 0; +} + +static void reset_rev_ctx(uint32_t revision) +{ + rev_ctx.revision = revision; + rev_ctx.timestamp = 0; + rev_ctx.log = NULL; + rev_ctx.author = ~0; +} + +static void reset_dump_ctx(uint32_t url) +{ + dump_ctx.url = url; + dump_ctx.uuid = ~0; +} + +static void init_keys(void) +{ + keys.svn_log = pool_intern("svn:log"); + keys.svn_author = pool_intern("svn:author"); + keys.svn_date = pool_intern("svn:date"); + keys.svn_executable = pool_intern("svn:executable"); + keys.svn_special = pool_intern("svn:special"); + keys.uuid = pool_intern("UUID"); + keys.revision_number = pool_intern("Revision-number"); + keys.node_path = pool_intern("Node-path"); + keys.node_kind = pool_intern("Node-kind"); + keys.node_action = pool_intern("Node-action"); + keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); + keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); + keys.text_content_length = pool_intern("Text-content-length"); + keys.prop_content_length = pool_intern("Prop-content-length"); + keys.content_length = pool_intern("Content-length"); +} + +static void read_props(void) +{ + uint32_t len; + uint32_t key = ~0; + char *val = NULL; + char *t; + while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { + if (!strncmp(t, "K ", 2)) { + len = atoi(&t[2]); + key = pool_intern(buffer_read_string(len)); + buffer_read_line(); + } else if (!strncmp(t, "V ", 2)) { + len = atoi(&t[2]); + val = buffer_read_string(len); + if (key == keys.svn_log) { + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + fprintf(stderr, "Invalid timestamp: %s\n", val); + } else if (key == keys.svn_executable) { + node_ctx.type = REPO_MODE_EXE; + } else if (key == keys.svn_special) { + node_ctx.type = REPO_MODE_LNK; + } + key = ~0; + buffer_read_line(); + } + } +} + +static void handle_node(void) +{ + if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) + read_props(); + + if (node_ctx.srcRev) + node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + + if (node_ctx.textLength != LENGTH_UNKNOWN && + node_ctx.type != REPO_MODE_DIR) + node_ctx.mark = next_blob_mark(); + + if (node_ctx.action == NODEACT_DELETE) { + repo_delete(node_ctx.dst); + } else if (node_ctx.action == NODEACT_CHANGE || + node_ctx.action == NODEACT_REPLACE) { + if (node_ctx.action == NODEACT_REPLACE && + node_ctx.type == REPO_MODE_DIR) + repo_replace(node_ctx.dst, node_ctx.mark); + else if (node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + } else if (node_ctx.action == NODEACT_ADD) { + if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || + node_ctx.textLength != LENGTH_UNKNOWN) + repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + } + + if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) + node_ctx.type = node_ctx.srcMode; + + if (node_ctx.mark) + fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + buffer_skip_bytes(node_ctx.textLength); +} + +static void handle_revision(void) +{ + if (rev_ctx.revision) + repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); +} + +void svndump_read(const char *url) +{ + char *val; + char *t; + uint32_t active_ctx = DUMP_CTX; + uint32_t len; + uint32_t key; + + reset_dump_ctx(pool_intern(url)); + while ((t = buffer_read_line())) { + val = strstr(t, ": "); + if (!val) + continue; + *val++ = '\0'; + *val++ = '\0'; + key = pool_intern(t); + + if (key == keys.uuid) { + dump_ctx.uuid = pool_intern(val); + } else if (key == keys.revision_number) { + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); + active_ctx = REV_CTX; + reset_rev_ctx(atoi(val)); + } else if (key == keys.node_path) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + } else if (key == keys.node_kind) { + if (!strcmp(val, "dir")) + node_ctx.type = REPO_MODE_DIR; + else if (!strcmp(val, "file")) + node_ctx.type = REPO_MODE_BLB; + else + fprintf(stderr, "Unknown node-kind: %s\n", val); + } else if (key == keys.node_action) { + if (!strcmp(val, "delete")) { + node_ctx.action = NODEACT_DELETE; + } else if (!strcmp(val, "add")) { + node_ctx.action = NODEACT_ADD; + } else if (!strcmp(val, "change")) { + node_ctx.action = NODEACT_CHANGE; + } else if (!strcmp(val, "replace")) { + node_ctx.action = NODEACT_REPLACE; + } else { + fprintf(stderr, "Unknown node-action: %s\n", val); + node_ctx.action = NODEACT_UNKNOWN; + } + } else if (key == keys.node_copyfrom_path) { + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + } else if (key == keys.node_copyfrom_rev) { + node_ctx.srcRev = atoi(val); + } else if (key == keys.text_content_length) { + node_ctx.textLength = atoi(val); + } else if (key == keys.prop_content_length) { + node_ctx.propLength = atoi(val); + } else if (key == keys.content_length) { + len = atoi(val); + buffer_read_line(); + if (active_ctx == REV_CTX) { + read_props(); + } else if (active_ctx == NODE_CTX) { + handle_node(); + active_ctx = REV_CTX; + } else { + fprintf(stderr, "Unexpected content length header: %d\n", len); + buffer_skip_bytes(len); + } + } + } + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); +} + +void svndump_init(const char *filename) +{ + buffer_init(filename); + repo_init(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + init_keys(); +} + +void svndump_deinit(void) +{ + log_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + if (buffer_deinit()) + fprintf(stderr, "Input error\n"); + if (ferror(stdout)) + fprintf(stderr, "Output error\n"); +} + +void svndump_reset(void) +{ + log_reset(); + buffer_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); +} diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h new file mode 100644 index 0000000000..93c412f14a --- /dev/null +++ b/vcs-svn/svndump.h @@ -0,0 +1,9 @@ +#ifndef SVNDUMP_H_ +#define SVNDUMP_H_ + +void svndump_init(const char *filename); +void svndump_read(const char *url); +void svndump_deinit(void); +void svndump_reset(void); + +#endif diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h new file mode 100644 index 0000000000..ee35c688a0 --- /dev/null +++ b/vcs-svn/trp.h @@ -0,0 +1,236 @@ +/* + * C macro implementation of treaps. + * + * Usage: + * #include <stdint.h> + * #include "trp.h" + * trp_gen(...) + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef TRP_H_ +#define TRP_H_ + +#define MAYBE_UNUSED __attribute__((__unused__)) + +/* Node structure. */ +struct trp_node { + uint32_t trpn_left; + uint32_t trpn_right; +}; + +/* Root structure. */ +struct trp_root { + uint32_t trp_root; +}; + +/* Pointer/Offset conversion. */ +#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) +#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) +#define trpn_modify(a_base, a_offset) \ + do { \ + if ((a_offset) < a_base##_pool.committed) { \ + uint32_t old_offset = (a_offset);\ + (a_offset) = a_base##_alloc(1); \ + *trpn_pointer(a_base, a_offset) = \ + *trpn_pointer(a_base, old_offset); \ + } \ + } while (0) + +/* Left accessors. */ +#define trp_left_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_left) +#define trp_left_set(a_base, a_field, a_node, a_left) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_left_get(a_base, a_field, a_node) = (a_left); \ + } while (0) + +/* Right accessors. */ +#define trp_right_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_right) +#define trp_right_set(a_base, a_field, a_node, a_right) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_right_get(a_base, a_field, a_node) = (a_right); \ + } while (0) + +/* + * Fibonacci hash function. + * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). + * See Knuth §6.4: volume 3, 3rd ed, p518. + */ +#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) + +/* Priority accessors. */ +#define trp_prio_get(a_node) trpn_hash(a_node) + +/* Node initializer. */ +#define trp_node_new(a_base, a_field, a_node) \ + do { \ + trp_left_set(a_base, a_field, (a_node), ~0); \ + trp_right_set(a_base, a_field, (a_node), ~0); \ + } while (0) + +/* Internal utility macros. */ +#define trpn_first(a_base, a_field, a_root, r_node) \ + do { \ + (r_node) = (a_root); \ + if ((r_node) == ~0) \ + return NULL; \ + while (~trp_left_get(a_base, a_field, (r_node))) \ + (r_node) = trp_left_get(a_base, a_field, (r_node)); \ + } while (0) + +#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_right_get(a_base, a_field, (a_node)); \ + trp_right_set(a_base, a_field, (a_node), \ + trp_left_get(a_base, a_field, (r_node))); \ + trp_left_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_left_get(a_base, a_field, (a_node)); \ + trp_left_set(a_base, a_field, (a_node), \ + trp_right_get(a_base, a_field, (r_node))); \ + trp_right_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ +a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ +{ \ + uint32_t ret; \ + trpn_first(a_base, a_field, treap->trp_root, ret); \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t ret; \ + uint32_t offset = trpn_offset(a_base, node); \ + if (~trp_right_get(a_base, a_field, offset)) { \ + trpn_first(a_base, a_field, \ + trp_right_get(a_base, a_field, offset), ret); \ + } else { \ + uint32_t tnode = treap->trp_root; \ + ret = ~0; \ + while (1) { \ + int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ + trpn_pointer(a_base, tnode)); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = trp_left_get(a_base, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = trp_right_get(a_base, a_field, tnode); \ + } else { \ + break; \ + } \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + if (!~trp_left_get(a_base, a_field, ret)) \ + break; \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ +{ \ + if (cur_node == ~0) { \ + return ins_node; \ + } else { \ + uint32_t ret; \ + int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp < 0) { \ + uint32_t left = a_pre##insert_recurse( \ + trp_left_get(a_base, a_field, cur_node), ins_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + if (trp_prio_get(left) < trp_prio_get(cur_node)) \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } else { \ + uint32_t right = a_pre##insert_recurse( \ + trp_right_get(a_base, a_field, cur_node), ins_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + if (trp_prio_get(right) < trp_prio_get(cur_node)) \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } \ + return ret; \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t offset = trpn_offset(a_base, node); \ + trp_node_new(a_base, a_field, offset); \ + treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ +{ \ + int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp == 0) { \ + uint32_t ret; \ + uint32_t left = trp_left_get(a_base, a_field, cur_node); \ + uint32_t right = trp_right_get(a_base, a_field, cur_node); \ + if (left == ~0) { \ + if (right == ~0) \ + return ~0; \ + } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + right = a_pre##remove_recurse(cur_node, rem_node); \ + trp_right_set(a_base, a_field, ret, right); \ + return ret; \ + } \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + left = a_pre##remove_recurse(cur_node, rem_node); \ + trp_left_set(a_base, a_field, ret, left); \ + return ret; \ + } else if (cmp < 0) { \ + uint32_t left = a_pre##remove_recurse( \ + trp_left_get(a_base, a_field, cur_node), rem_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + return cur_node; \ + } else { \ + uint32_t right = a_pre##remove_recurse( \ + trp_right_get(a_base, a_field, cur_node), rem_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + return cur_node; \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ +{ \ + treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ + trpn_offset(a_base, node)); \ +} \ + +#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt new file mode 100644 index 0000000000..eb4c191875 --- /dev/null +++ b/vcs-svn/trp.txt @@ -0,0 +1,103 @@ +Motivation +========== + +Treaps provide a memory-efficient binary search tree structure. +Insertion/deletion/search are about as about as fast in the average +case as red-black trees and the chances of worst-case behavior are +vanishingly small, thanks to (pseudo-)randomness. The bad worst-case +behavior is a small price to pay, given that treaps are much simpler +to implement. + +API +=== + +The trp API generates a data structure and functions to handle a +large growing set of objects stored in a pool. + +The caller: + +. Specifies parameters for the generated functions with the + trp_gen(static, foo_, ...) macro. + +. Allocates a `struct trp_root` variable and sets it to {~0}. + +. Adds new nodes to the set using `foo_insert`. + +. Can find a specific item in the set using `foo_search`. + +. Can iterate over items in the set using `foo_first` and `foo_next`. + +. Can remove an item from the set using `foo_remove`. + +Example: + +---- +struct ex_node { + const char *s; + struct trp_node ex_link; +}; +static struct trp_root ex_base = {~0}; +obj_pool_gen(ex, struct ex_node, 4096); +trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) +struct ex_node *item; + +item = ex_pointer(ex_alloc(1)); +item->s = "hello"; +ex_insert(&ex_base, item); +item = ex_pointer(ex_alloc(1)); +item->s = "goodbye"; +ex_insert(&ex_base, item); +for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) + printf("%s\n", item->s); +---- + +Functions +--------- + +trp_gen(attr, foo_, node_type, link_field, pool, cmp):: + + Generate a type-specific treap implementation. ++ +. The storage class for generated functions will be 'attr' (e.g., `static`). +. Generated function names are prefixed with 'foo_' (e.g., `treap_`). +. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). + This type must be a struct with at least one `struct trp_node` field + to point to its children. +. The field used to access child nodes will be 'link_field'. +. All treap nodes must lie in the 'pool' object pool. +. Treap nodes must be totally ordered by the 'cmp' relation, with the + following prototype: ++ +int (*cmp)(node_type \*a, node_type \*b) ++ +and returning a value less than, equal to, or greater than zero +according to the result of comparison. + +void foo_insert(struct trp_root *treap, node_type \*node):: + + Insert node into treap. If inserted multiple times, + a node will appear in the treap multiple times. + +void foo_remove(struct trp_root *treap, node_type \*node):: + + Remove node from treap. Caller must ensure node is + present in treap before using this function. + +node_type *foo_search(struct trp_root \*treap, node_type \*key):: + + Search for a node that matches key. If no match is found, + result is NULL. + +node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: + + Like `foo_search`, but if if the key is missing return what + would be key's successor, were key in treap (NULL if no + successor). + +node_type *foo_first(struct trp_root \*treap):: + + Find the first item from the treap, in sorted order. + +node_type *foo_next(struct trp_root \*treap, node_type \*node):: + + Find the next item. diff --git a/wt-status.c b/wt-status.c index 2f9e33c8fa..54b6b03b9c 100644 --- a/wt-status.c +++ b/wt-status.c @@ -313,8 +313,10 @@ static void wt_status_collect_changes_worktree(struct wt_status *s) DIFF_OPT_SET(&rev.diffopt, DIRTY_SUBMODULES); if (!s->show_untracked_files) DIFF_OPT_SET(&rev.diffopt, IGNORE_UNTRACKED_IN_SUBMODULES); - if (s->ignore_submodule_arg) + if (s->ignore_submodule_arg) { + DIFF_OPT_SET(&rev.diffopt, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(&rev.diffopt, s->ignore_submodule_arg); + } rev.diffopt.format_callback = wt_status_collect_changed_cb; rev.diffopt.format_callback_data = s; rev.prune_data = s->pathspec; @@ -331,8 +333,10 @@ static void wt_status_collect_changes_index(struct wt_status *s) opt.def = s->is_initial ? EMPTY_TREE_SHA1_HEX : s->reference; setup_revisions(0, NULL, &rev, &opt); - if (s->ignore_submodule_arg) + if (s->ignore_submodule_arg) { + DIFF_OPT_SET(&rev.diffopt, OVERRIDE_SUBMODULE_CONFIG); handle_ignore_submodules_arg(&rev.diffopt, s->ignore_submodule_arg); + } rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK; rev.diffopt.format_callback = wt_status_collect_updated_cb; |