diff options
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/completion/git-completion.bash | 2 | ||||
-rw-r--r-- | contrib/completion/git-prompt.sh | 332 | ||||
-rw-r--r-- | contrib/mw-to-git/.perlcriticrc | 28 | ||||
-rw-r--r-- | contrib/mw-to-git/Makefile | 2 | ||||
-rwxr-xr-x | contrib/mw-to-git/git-remote-mediawiki.perl | 537 | ||||
-rwxr-xr-x | contrib/mw-to-git/t/test-gitmw-lib.sh | 19 | ||||
-rw-r--r-- | contrib/mw-to-git/t/test.config | 4 |
7 files changed, 491 insertions, 433 deletions
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 6c3bafeea5..ebc40d4845 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -33,8 +33,6 @@ esac # returns location of .git repo __gitdir () { - # Note: this function is duplicated in git-prompt.sh - # When updating it, make sure you update the other one to match. if [ -z "${1-}" ]; then if [ -n "${__git_dir-}" ]; then echo "$__git_dir" diff --git a/contrib/completion/git-prompt.sh b/contrib/completion/git-prompt.sh index 86a4f3fa49..a81ef5a482 100644 --- a/contrib/completion/git-prompt.sh +++ b/contrib/completion/git-prompt.sh @@ -3,7 +3,7 @@ # Copyright (C) 2006,2007 Shawn O. Pearce <spearce@spearce.org> # Distributed under the GNU General Public License, version 2.0. # -# This script allows you to see the current branch in your prompt. +# This script allows you to see repository status in your prompt. # # To enable: # @@ -13,24 +13,27 @@ # 3a) Change your PS1 to call __git_ps1 as # command-substitution: # Bash: PS1='[\u@\h \W$(__git_ps1 " (%s)")]\$ ' -# ZSH: PS1='[%n@%m %c$(__git_ps1 " (%s)")]\$ ' +# ZSH: setopt PROMPT_SUBST ; PS1='[%n@%m %c$(__git_ps1 " (%s)")]\$ ' # the optional argument will be used as format string. -# 3b) Alternatively, if you are using bash, __git_ps1 can be -# used for PROMPT_COMMAND with two parameters, <pre> and -# <post>, which are strings you would put in $PS1 before -# and after the status string generated by the git-prompt -# machinery. e.g. +# 3b) Alternatively, for a slightly faster prompt, __git_ps1 can +# be used for PROMPT_COMMAND in Bash or for precmd() in Zsh +# with two parameters, <pre> and <post>, which are strings +# you would put in $PS1 before and after the status string +# generated by the git-prompt machinery. e.g. # Bash: PROMPT_COMMAND='__git_ps1 "\u@\h:\w" "\\\$ "' +# will show username, at-sign, host, colon, cwd, then +# various status string, followed by dollar and SP, as +# your prompt. # ZSH: precmd () { __git_ps1 "%n" ":%~$ " "|%s" } -# will show username, at-sign, host, colon, cwd, then -# various status string, followed by dollar and SP, as -# your prompt. +# will show username, pipe, then various status string, +# followed by colon, cwd, dollar and SP, as your prompt. # Optionally, you can supply a third argument with a printf # format string to finetune the output of the branch status # -# The argument to __git_ps1 will be displayed only if you are currently -# in a git repository. The %s token will be the name of the current -# branch. +# The repository status will be displayed only if you are currently in a +# git repository. The %s token is the placeholder for the shown status. +# +# The prompt status always includes the current branch name. # # In addition, if you set GIT_PS1_SHOWDIRTYSTATE to a nonempty value, # unstaged (*) and staged (+) changes will be shown next to the branch @@ -78,31 +81,8 @@ # # If you would like a colored hint about the current dirty state, set # GIT_PS1_SHOWCOLORHINTS to a nonempty value. The colors are based on -# the colored output of "git status -sb". - -# __gitdir accepts 0 or 1 arguments (i.e., location) -# returns location of .git repo -__gitdir () -{ - # Note: this function is duplicated in git-completion.bash - # When updating it, make sure you update the other one to match. - if [ -z "${1-}" ]; then - if [ -n "${__git_dir-}" ]; then - echo "$__git_dir" - elif [ -n "${GIT_DIR-}" ]; then - test -d "${GIT_DIR-}" || return 1 - echo "$GIT_DIR" - elif [ -d .git ]; then - echo .git - else - git rev-parse --git-dir 2>/dev/null - fi - elif [ -d "$1/.git" ]; then - echo "$1/.git" - else - echo "$1" - fi -} +# the colored output of "git status -sb" and are available only when +# using __git_ps1 for PROMPT_COMMAND or precmd. # stores the divergence from upstream in $p # used by GIT_PS1_SHOWUPSTREAM @@ -225,8 +205,8 @@ __git_ps1_show_upstream () } # Helper function that is meant to be called from __git_ps1. It -# builds up a gitstring injecting color codes into the appropriate -# places. +# injects color codes into the appropriate gitstring variables used +# to build a gitstring. __git_ps1_colorize_gitstring () { if [[ -n ${ZSH_VERSION-} ]]; then @@ -234,74 +214,40 @@ __git_ps1_colorize_gitstring () local c_green='%F{green}' local c_lblue='%F{blue}' local c_clear='%f' - local bad_color=$c_red - local ok_color=$c_green - local branch_color="$c_clear" - local flags_color="$c_lblue" - local branchstring="$c${b##refs/heads/}" - - if [ $detached = no ]; then - branch_color="$ok_color" - else - branch_color="$bad_color" - fi - - gitstring="$branch_color$branchstring$c_clear" - - if [ -n "$w$i$s$u$r$p" ]; then - gitstring="$gitstring$z" - fi - if [ "$w" = "*" ]; then - gitstring="$gitstring$bad_color$w" - fi - if [ -n "$i" ]; then - gitstring="$gitstring$ok_color$i" - fi - if [ -n "$s" ]; then - gitstring="$gitstring$flags_color$s" - fi - if [ -n "$u" ]; then - gitstring="$gitstring$bad_color$u" - fi - gitstring="$gitstring$c_clear$r$p" - return + else + # Using \[ and \] around colors is necessary to prevent + # issues with command line editing/browsing/completion! + local c_red='\[\e[31m\]' + local c_green='\[\e[32m\]' + local c_lblue='\[\e[1;34m\]' + local c_clear='\[\e[0m\]' fi - local c_red='\e[31m' - local c_green='\e[32m' - local c_lblue='\e[1;34m' - local c_clear='\e[0m' local bad_color=$c_red local ok_color=$c_green - local branch_color="$c_clear" local flags_color="$c_lblue" - local branchstring="$c${b##refs/heads/}" + local branch_color="" if [ $detached = no ]; then branch_color="$ok_color" else branch_color="$bad_color" fi + c="$branch_color$c" - # Setting gitstring directly with \[ and \] around colors - # is necessary to prevent wrapping issues! - gitstring="\[$branch_color\]$branchstring\[$c_clear\]" - - if [ -n "$w$i$s$u$r$p" ]; then - gitstring="$gitstring$z" - fi + z="$c_clear$z" if [ "$w" = "*" ]; then - gitstring="$gitstring\[$bad_color\]$w" + w="$bad_color$w" fi if [ -n "$i" ]; then - gitstring="$gitstring\[$ok_color\]$i" + i="$ok_color$i" fi if [ -n "$s" ]; then - gitstring="$gitstring\[$flags_color\]$s" + s="$flags_color$s" fi if [ -n "$u" ]; then - gitstring="$gitstring\[$bad_color\]$u" + u="$bad_color$u" fi - gitstring="$gitstring\[$c_clear\]$r$p" + r="$c_clear$r" } # __git_ps1 accepts 0 or 1 arguments (i.e., format string) @@ -335,50 +281,83 @@ __git_ps1 () ;; esac - local g="$(__gitdir)" - if [ -z "$g" ]; then + local repo_info rev_parse_exit_code + repo_info="$(git rev-parse --git-dir --is-inside-git-dir \ + --is-bare-repository --is-inside-work-tree \ + --short HEAD 2>/dev/null)" + rev_parse_exit_code="$?" + + if [ -z "$repo_info" ]; then if [ $pcmode = yes ]; then #In PC mode PS1 always needs to be set PS1="$ps1pc_start$ps1pc_end" fi + return + fi + + local short_sha + if [ "$rev_parse_exit_code" = "0" ]; then + short_sha="${repo_info##*$'\n'}" + repo_info="${repo_info%$'\n'*}" + fi + local inside_worktree="${repo_info##*$'\n'}" + repo_info="${repo_info%$'\n'*}" + local bare_repo="${repo_info##*$'\n'}" + repo_info="${repo_info%$'\n'*}" + local inside_gitdir="${repo_info##*$'\n'}" + local g="${repo_info%$'\n'*}" + + local r="" + local b="" + local step="" + local total="" + if [ -d "$g/rebase-merge" ]; then + read b 2>/dev/null <"$g/rebase-merge/head-name" + read step 2>/dev/null <"$g/rebase-merge/msgnum" + read total 2>/dev/null <"$g/rebase-merge/end" + if [ -f "$g/rebase-merge/interactive" ]; then + r="|REBASE-i" + else + r="|REBASE-m" + fi else - local r="" - local b="" - local step="" - local total="" - if [ -d "$g/rebase-merge" ]; then - b="$(cat "$g/rebase-merge/head-name")" - step=$(cat "$g/rebase-merge/msgnum") - total=$(cat "$g/rebase-merge/end") - if [ -f "$g/rebase-merge/interactive" ]; then - r="|REBASE-i" + if [ -d "$g/rebase-apply" ]; then + read step 2>/dev/null <"$g/rebase-apply/next" + read total 2>/dev/null <"$g/rebase-apply/last" + if [ -f "$g/rebase-apply/rebasing" ]; then + read b 2>/dev/null <"$g/rebase-apply/head-name" + r="|REBASE" + elif [ -f "$g/rebase-apply/applying" ]; then + r="|AM" else - r="|REBASE-m" + r="|AM/REBASE" fi + elif [ -f "$g/MERGE_HEAD" ]; then + r="|MERGING" + elif [ -f "$g/CHERRY_PICK_HEAD" ]; then + r="|CHERRY-PICKING" + elif [ -f "$g/REVERT_HEAD" ]; then + r="|REVERTING" + elif [ -f "$g/BISECT_LOG" ]; then + r="|BISECTING" + fi + + if [ -n "$b" ]; then + : + elif [ -h "$g/HEAD" ]; then + # symlink symbolic ref + b="$(git symbolic-ref HEAD 2>/dev/null)" else - if [ -d "$g/rebase-apply" ]; then - step=$(cat "$g/rebase-apply/next") - total=$(cat "$g/rebase-apply/last") - if [ -f "$g/rebase-apply/rebasing" ]; then - b="$(cat "$g/rebase-apply/head-name")" - r="|REBASE" - elif [ -f "$g/rebase-apply/applying" ]; then - r="|AM" - else - r="|AM/REBASE" + local head="" + if ! read head 2>/dev/null <"$g/HEAD"; then + if [ $pcmode = yes ]; then + PS1="$ps1pc_start$ps1pc_end" fi - elif [ -f "$g/MERGE_HEAD" ]; then - r="|MERGING" - elif [ -f "$g/CHERRY_PICK_HEAD" ]; then - r="|CHERRY-PICKING" - elif [ -f "$g/REVERT_HEAD" ]; then - r="|REVERTING" - elif [ -f "$g/BISECT_LOG" ]; then - r="|BISECTING" + return fi - - test -n "$b" || - b="$(git symbolic-ref HEAD 2>/dev/null)" || { + # is it a symbolic ref? + b="${head#ref: }" + if [ "$head" = "$b" ]; then detached=yes b="$( case "${GIT_PS1_DESCRIBE_STYLE-}" in @@ -392,70 +371,75 @@ __git_ps1 () git describe --tags --exact-match HEAD ;; esac 2>/dev/null)" || - b="$(cut -c1-7 "$g/HEAD" 2>/dev/null)..." || - b="unknown" + b="$short_sha..." b="($b)" - } + fi fi + fi - if [ -n "$step" ] && [ -n "$total" ]; then - r="$r $step/$total" - fi + if [ -n "$step" ] && [ -n "$total" ]; then + r="$r $step/$total" + fi - local w="" - local i="" - local s="" - local u="" - local c="" - local p="" + local w="" + local i="" + local s="" + local u="" + local c="" + local p="" - if [ "true" = "$(git rev-parse --is-inside-git-dir 2>/dev/null)" ]; then - if [ "true" = "$(git rev-parse --is-bare-repository 2>/dev/null)" ]; then - c="BARE:" + if [ "true" = "$inside_gitdir" ]; then + if [ "true" = "$bare_repo" ]; then + c="BARE:" + else + b="GIT_DIR!" + fi + elif [ "true" = "$inside_worktree" ]; then + if [ -n "${GIT_PS1_SHOWDIRTYSTATE-}" ] && + [ "$(git config --bool bash.showDirtyState)" != "false" ] + then + git diff --no-ext-diff --quiet --exit-code || w="*" + if [ -n "$short_sha" ]; then + git diff-index --cached --quiet HEAD -- || i="+" else - b="GIT_DIR!" - fi - elif [ "true" = "$(git rev-parse --is-inside-work-tree 2>/dev/null)" ]; then - if [ -n "${GIT_PS1_SHOWDIRTYSTATE-}" ] && - [ "$(git config --bool bash.showDirtyState)" != "false" ] - then - git diff --no-ext-diff --quiet --exit-code || w="*" - if git rev-parse --quiet --verify HEAD >/dev/null; then - git diff-index --cached --quiet HEAD -- || i="+" - else - i="#" - fi - fi - if [ -n "${GIT_PS1_SHOWSTASHSTATE-}" ]; then - git rev-parse --verify refs/stash >/dev/null 2>&1 && s="$" + i="#" fi + fi + if [ -n "${GIT_PS1_SHOWSTASHSTATE-}" ] && + [ -r "$g/refs/stash" ]; then + s="$" + fi - if [ -n "${GIT_PS1_SHOWUNTRACKEDFILES-}" ] && - [ "$(git config --bool bash.showUntrackedFiles)" != "false" ] && - [ -n "$(git ls-files --others --exclude-standard)" ] - then - u="%${ZSH_VERSION+%}" - fi + if [ -n "${GIT_PS1_SHOWUNTRACKEDFILES-}" ] && + [ "$(git config --bool bash.showUntrackedFiles)" != "false" ] && + git ls-files --others --exclude-standard --error-unmatch -- '*' >/dev/null 2>/dev/null + then + u="%${ZSH_VERSION+%}" + fi - if [ -n "${GIT_PS1_SHOWUPSTREAM-}" ]; then - __git_ps1_show_upstream - fi + if [ -n "${GIT_PS1_SHOWUPSTREAM-}" ]; then + __git_ps1_show_upstream fi + fi - local z="${GIT_PS1_STATESEPARATOR-" "}" - local f="$w$i$s$u" - if [ $pcmode = yes ]; then - local gitstring= - if [ -n "${GIT_PS1_SHOWCOLORHINTS-}" ]; then - __git_ps1_colorize_gitstring - else - gitstring="$c${b##refs/heads/}${f:+$z$f}$r$p" - fi + local z="${GIT_PS1_STATESEPARATOR-" "}" + + # NO color option unless in PROMPT_COMMAND mode + if [ $pcmode = yes ] && [ -n "${GIT_PS1_SHOWCOLORHINTS-}" ]; then + __git_ps1_colorize_gitstring + fi + + local f="$w$i$s$u" + local gitstring="$c${b##refs/heads/}${f:+$z$f}$r$p" + + if [ $pcmode = yes ]; then + if [[ -n ${ZSH_VERSION-} ]]; then gitstring=$(printf -- "$printf_format" "$gitstring") - PS1="$ps1pc_start$gitstring$ps1pc_end" else - # NO color option unless in PROMPT_COMMAND mode - printf -- "$printf_format" "$c${b##refs/heads/}${f:+$z$f}$r$p" + printf -v gitstring -- "$printf_format" "$gitstring" fi + PS1="$ps1pc_start$gitstring$ps1pc_end" + else + printf -- "$printf_format" "$gitstring" fi } diff --git a/contrib/mw-to-git/.perlcriticrc b/contrib/mw-to-git/.perlcriticrc new file mode 100644 index 0000000000..5a9955d757 --- /dev/null +++ b/contrib/mw-to-git/.perlcriticrc @@ -0,0 +1,28 @@ +# These 3 rules demand to add the s, m and x flag to *every* regexp. This is +# overkill and would be harmful for readability. +[-RegularExpressions::RequireExtendedFormatting] +[-RegularExpressions::RequireDotMatchAnything] +[-RegularExpressions::RequireLineBoundaryMatching] + +# This rule says that builtin functions should not be called with parentheses +# e.g.: (taken from CPAN's documentation) +# open($handle, '>', $filename); #not ok +# open $handle, '>', $filename; #ok +# Applying such a rule would mean modifying a huge number of lines for a +# question of style. +[-CodeLayout::ProhibitParensWithBuiltins] + +# This rule states that each system call should have its return value checked +# The problem is that it includes the print call. Checking every print call's +# return value would be harmful to the code readabilty. +# This configuration keeps all default function but print. +[InputOutput::RequireCheckedSyscalls] +functions = open say close + +# This rules demands to add a dependancy for the Readonly module. This is not +# wished. +[-ValuesAndExpressions::ProhibitConstantPragma] + +# This rule is not really useful (rather a question of style) and produces many +# warnings among the code. +[-ValuesAndExpressions::ProhibitNoisyQuotes] diff --git a/contrib/mw-to-git/Makefile b/contrib/mw-to-git/Makefile index f14971987c..1fb2424481 100644 --- a/contrib/mw-to-git/Makefile +++ b/contrib/mw-to-git/Makefile @@ -15,3 +15,5 @@ all: build build install clean: $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL=$(SCRIPT_PERL_FULL) \ $@-perl-script +perlcritic: + perlcritic -2 *.perl diff --git a/contrib/mw-to-git/git-remote-mediawiki.perl b/contrib/mw-to-git/git-remote-mediawiki.perl index c1a967b3d1..71baf8ace8 100755 --- a/contrib/mw-to-git/git-remote-mediawiki.perl +++ b/contrib/mw-to-git/git-remote-mediawiki.perl @@ -15,18 +15,16 @@ use strict; use MediaWiki::API; use Git; use DateTime::Format::ISO8601; +use warnings; # By default, use UTF-8 to communicate with Git and the user -binmode STDERR, ":utf8"; -binmode STDOUT, ":utf8"; +binmode STDERR, ':encoding(UTF-8)'; +binmode STDOUT, ':encoding(UTF-8)'; use URI::Escape; -use IPC::Open2; - -use warnings; # Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced -use constant SLASH_REPLACEMENT => "%2F"; +use constant SLASH_REPLACEMENT => '%2F'; # It's not always possible to delete pages (may require some # privileges). Deleted pages are replaced with this content. @@ -37,11 +35,23 @@ use constant DELETED_CONTENT => "[[Category:Deleted]]\n"; use constant EMPTY_CONTENT => "<!-- empty page -->\n"; # used to reflect file creation or deletion in diff. -use constant NULL_SHA1 => "0000000000000000000000000000000000000000"; +use constant NULL_SHA1 => '0000000000000000000000000000000000000000'; # Used on Git's side to reflect empty edit messages on the wiki use constant EMPTY_MESSAGE => '*Empty MediaWiki Message*'; +use constant EMPTY => q{}; + +# Number of pages taken into account at once in submodule get_mw_page_list +use constant SLICE_SIZE => 50; + +# Number of linked mediafile to get at once in get_linked_mediafiles +# The query is split in small batches because of the MW API limit of +# the number of links to be returned (500 links max). +use constant BATCH_SIZE => 10; + +use constant HTTP_CODE_OK => 200; + if (@ARGV != 2) { exit_error_usage(); } @@ -51,35 +61,35 @@ my $url = $ARGV[1]; # Accept both space-separated and multiple keys in config file. # Spaces should be written as _ anyway because we'll use chomp. -my @tracked_pages = split(/[ \n]/, run_git("config --get-all remote.". $remotename .".pages")); +my @tracked_pages = split(/[ \n]/, run_git("config --get-all remote.${remotename}.pages")); chomp(@tracked_pages); # Just like @tracked_pages, but for MediaWiki categories. -my @tracked_categories = split(/[ \n]/, run_git("config --get-all remote.". $remotename .".categories")); +my @tracked_categories = split(/[ \n]/, run_git("config --get-all remote.${remotename}.categories")); chomp(@tracked_categories); # Import media files on pull -my $import_media = run_git("config --get --bool remote.". $remotename .".mediaimport"); +my $import_media = run_git("config --get --bool remote.${remotename}.mediaimport"); chomp($import_media); -$import_media = ($import_media eq "true"); +$import_media = ($import_media eq 'true'); # Export media files on push -my $export_media = run_git("config --get --bool remote.". $remotename .".mediaexport"); +my $export_media = run_git("config --get --bool remote.${remotename}.mediaexport"); chomp($export_media); -$export_media = !($export_media eq "false"); +$export_media = !($export_media eq 'false'); -my $wiki_login = run_git("config --get remote.". $remotename .".mwLogin"); +my $wiki_login = run_git("config --get remote.${remotename}.mwLogin"); # Note: mwPassword is discourraged. Use the credential system instead. -my $wiki_passwd = run_git("config --get remote.". $remotename .".mwPassword"); -my $wiki_domain = run_git("config --get remote.". $remotename .".mwDomain"); +my $wiki_passwd = run_git("config --get remote.${remotename}.mwPassword"); +my $wiki_domain = run_git("config --get remote.${remotename}.mwDomain"); chomp($wiki_login); chomp($wiki_passwd); chomp($wiki_domain); # Import only last revisions (both for clone and fetch) -my $shallow_import = run_git("config --get --bool remote.". $remotename .".shallow"); +my $shallow_import = run_git("config --get --bool remote.${remotename}.shallow"); chomp($shallow_import); -$shallow_import = ($shallow_import eq "true"); +$shallow_import = ($shallow_import eq 'true'); # Fetch (clone and pull) by revisions instead of by pages. This behavior # is more efficient when we have a wiki with lots of pages and we fetch @@ -87,15 +97,18 @@ $shallow_import = ($shallow_import eq "true"); # Possible values: # - by_rev: perform one query per new revision on the remote wiki # - by_page: query each tracked page for new revision -my $fetch_strategy = run_git("config --get remote.$remotename.fetchStrategy"); -unless ($fetch_strategy) { - $fetch_strategy = run_git("config --get mediawiki.fetchStrategy"); +my $fetch_strategy = run_git("config --get remote.${remotename}.fetchStrategy"); +if (!$fetch_strategy) { + $fetch_strategy = run_git('config --get mediawiki.fetchStrategy'); } chomp($fetch_strategy); -unless ($fetch_strategy) { - $fetch_strategy = "by_page"; +if (!$fetch_strategy) { + $fetch_strategy = 'by_page'; } +# Remember the timestamp corresponding to a revision id. +my %basetimestamps; + # Dumb push: don't update notes and mediawiki ref to reflect the last push. # # Configurable with mediawiki.dumbPush, or per-remote with @@ -110,48 +123,25 @@ unless ($fetch_strategy) { # will get the history with information lost). If the import is # deterministic, this means everybody gets the same sha1 for each # MediaWiki revision. -my $dumb_push = run_git("config --get --bool remote.$remotename.dumbPush"); -unless ($dumb_push) { - $dumb_push = run_git("config --get --bool mediawiki.dumbPush"); +my $dumb_push = run_git("config --get --bool remote.${remotename}.dumbPush"); +if (!$dumb_push) { + $dumb_push = run_git('config --get --bool mediawiki.dumbPush'); } chomp($dumb_push); -$dumb_push = ($dumb_push eq "true"); +$dumb_push = ($dumb_push eq 'true'); my $wiki_name = $url; -$wiki_name =~ s/[^\/]*:\/\///; +$wiki_name =~ s{[^/]*://}{}; # If URL is like http://user:password@example.com/, we clearly don't # want the password in $wiki_name. While we're there, also remove user # and '@' sign, to avoid author like MWUser@HTTPUser@host.com $wiki_name =~ s/^.*@//; # Commands parser -my $entry; -my @cmd; while (<STDIN>) { chomp; - @cmd = split(/ /); - if (defined($cmd[0])) { - # Line not blank - if ($cmd[0] eq "capabilities") { - die("Too many arguments for capabilities") unless (!defined($cmd[1])); - mw_capabilities(); - } elsif ($cmd[0] eq "list") { - die("Too many arguments for list") unless (!defined($cmd[2])); - mw_list($cmd[1]); - } elsif ($cmd[0] eq "import") { - die("Invalid arguments for import") unless ($cmd[1] ne "" && !defined($cmd[2])); - mw_import($cmd[1]); - } elsif ($cmd[0] eq "option") { - die("Too many arguments for option") unless ($cmd[1] ne "" && $cmd[2] ne "" && !defined($cmd[3])); - mw_option($cmd[1],$cmd[2]); - } elsif ($cmd[0] eq "push") { - mw_push($cmd[1]); - } else { - print STDERR "Unknown command. Aborting...\n"; - last; - } - } else { - # blank line: we should terminate + + if (!parse_command($_)) { last; } @@ -172,6 +162,40 @@ sub exit_error_usage { "Then, use git commit, push and pull as with every normal git repository.\n"; } +sub parse_command { + my ($line) = @_; + my @cmd = split(/ /, $line); + if (!defined $cmd[0]) { + return 0; + } + if ($cmd[0] eq 'capabilities') { + die("Too many arguments for capabilities\n") + if (defined($cmd[1])); + mw_capabilities(); + } elsif ($cmd[0] eq 'list') { + die("Too many arguments for list\n") if (defined($cmd[2])); + mw_list($cmd[1]); + } elsif ($cmd[0] eq 'import') { + die("Invalid argument for import\n") + if ($cmd[1] eq EMPTY); + die("Too many arguments for import\n") + if (defined($cmd[2])); + mw_import($cmd[1]); + } elsif ($cmd[0] eq 'option') { + die("Invalid arguments for option\n") + if ($cmd[1] eq EMPTY || $cmd[2] eq EMPTY); + die("Too many arguments for option\n") + if (defined($cmd[3])); + mw_option($cmd[1],$cmd[2]); + } elsif ($cmd[0] eq 'push') { + mw_push($cmd[1]); + } else { + print {*STDERR} "Unknown command. Aborting...\n"; + return 0; + } + return 1; +} + # MediaWiki API instance, created lazily. my $mediawiki; @@ -180,7 +204,7 @@ sub mw_connect_maybe { return; } $mediawiki = MediaWiki::API->new; - $mediawiki->{config}->{api_url} = "$url/api.php"; + $mediawiki->{config}->{api_url} = "${url}/api.php"; if ($wiki_login) { my %credential = ( 'url' => $url, @@ -193,16 +217,17 @@ sub mw_connect_maybe { lgdomain => $wiki_domain}; if ($mediawiki->login($request)) { Git::credential(\%credential, 'approve'); - print STDERR "Logged in mediawiki user \"$credential{username}\".\n"; + print {*STDERR} qq(Logged in mediawiki user "$credential{username}".\n); } else { - print STDERR "Failed to log in mediawiki user \"$credential{username}\" on $url\n"; - print STDERR " (error " . + print {*STDERR} qq(Failed to log in mediawiki user "$credential{username}" on ${url}\n); + print {*STDERR} ' (error ' . $mediawiki->{error}->{code} . ': ' . $mediawiki->{error}->{details} . ")\n"; Git::credential(\%credential, 'reject'); exit 1; } } + return; } sub fatal_mw_error { @@ -225,21 +250,23 @@ sub fatal_mw_error { sub get_mw_tracked_pages { my $pages = shift; get_mw_page_list(\@tracked_pages, $pages); + return; } sub get_mw_page_list { my $page_list = shift; my $pages = shift; - my @some_pages = @$page_list; + my @some_pages = @{$page_list}; while (@some_pages) { - my $last = 50; - if ($#some_pages < $last) { - $last = $#some_pages; + my $last_page = SLICE_SIZE; + if ($#some_pages < $last_page) { + $last_page = $#some_pages; } - my @slice = @some_pages[0..$last]; + my @slice = @some_pages[0..$last_page]; get_mw_first_pages(\@slice, $pages); - @some_pages = @some_pages[51..$#some_pages]; + @some_pages = @some_pages[(SLICE_SIZE + 1)..$#some_pages]; } + return; } sub get_mw_tracked_categories { @@ -249,7 +276,7 @@ sub get_mw_tracked_categories { # Mediawiki requires the Category # prefix, but let's not force the user # to specify it. - $category = "Category:" . $category; + $category = "Category:${category}"; } my $mw_pages = $mediawiki->list( { action => 'query', @@ -257,11 +284,12 @@ sub get_mw_tracked_categories { cmtitle => $category, cmlimit => 'max' } ) || die $mediawiki->{error}->{code} . ': ' - . $mediawiki->{error}->{details}; + . $mediawiki->{error}->{details} . "\n"; foreach my $page (@{$mw_pages}) { $pages->{$page->{title}} = $page; } } + return; } sub get_mw_all_pages { @@ -278,6 +306,7 @@ sub get_mw_all_pages { foreach my $page (@{$mw_pages}) { $pages->{$page->{title}} = $page; } + return; } # queries the wiki for a set of pages. Meant to be used within a loop @@ -300,18 +329,19 @@ sub get_mw_first_pages { } while (my ($id, $page) = each(%{$mw_pages->{query}->{pages}})) { if ($id < 0) { - print STDERR "Warning: page $page->{title} not found on wiki\n"; + print {*STDERR} "Warning: page $page->{title} not found on wiki\n"; } else { $pages->{$page->{title}} = $page; } } + return; } # Get the list of pages to be fetched according to configuration. sub get_mw_pages { mw_connect_maybe(); - print STDERR "Listing pages on remote wiki...\n"; + print {*STDERR} "Listing pages on remote wiki...\n"; my %pages; # hash on page titles to avoid duplicates my $user_defined; @@ -329,14 +359,14 @@ sub get_mw_pages { get_mw_all_pages(\%pages); } if ($import_media) { - print STDERR "Getting media files for selected pages...\n"; + print {*STDERR} "Getting media files for selected pages...\n"; if ($user_defined) { get_linked_mediafiles(\%pages); } else { get_all_mediafiles(\%pages); } } - print STDERR (scalar keys %pages) . " pages found.\n"; + print {*STDERR} (scalar keys %pages) . " pages found.\n"; return %pages; } @@ -344,9 +374,13 @@ sub get_mw_pages { # $out = run_git("command args", "raw"); # don't interpret output as UTF-8. sub run_git { my $args = shift; - my $encoding = (shift || "encoding(UTF-8)"); - open(my $git, "-|:$encoding", "git " . $args); - my $res = do { local $/; <$git> }; + my $encoding = (shift || 'encoding(UTF-8)'); + open(my $git, "-|:${encoding}", "git ${args}") + or die "Unable to fork: $!\n"; + my $res = do { + local $/ = undef; + <$git> + }; close($git); return $res; @@ -361,27 +395,26 @@ sub get_all_mediafiles { my $mw_pages = $mediawiki->list({ action => 'query', list => 'allpages', - apnamespace => get_mw_namespace_id("File"), + apnamespace => get_mw_namespace_id('File'), aplimit => 'max' }); if (!defined($mw_pages)) { - print STDERR "fatal: could not get the list of pages for media files.\n"; - print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; - print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; + print {*STDERR} "fatal: could not get the list of pages for media files.\n"; + print {*STDERR} "fatal: '$url' does not appear to be a mediawiki\n"; + print {*STDERR} "fatal: make sure '$url/api.php' is a valid page.\n"; exit 1; } foreach my $page (@{$mw_pages}) { $pages->{$page->{title}} = $page; } + return; } sub get_linked_mediafiles { my $pages = shift; - my @titles = map $_->{title}, values(%{$pages}); + my @titles = map { $_->{title} } values(%{$pages}); - # The query is split in small batches because of the MW API limit of - # the number of links to be returned (500 links max). - my $batch = 10; + my $batch = BATCH_SIZE; while (@titles) { if ($#titles < $batch) { $batch = $#titles; @@ -397,7 +430,7 @@ sub get_linked_mediafiles { action => 'query', prop => 'links|images', titles => $mw_titles, - plnamespace => get_mw_namespace_id("File"), + plnamespace => get_mw_namespace_id('File'), pllimit => 'max' }; my $result = $mediawiki->api($query); @@ -405,11 +438,13 @@ sub get_linked_mediafiles { while (my ($id, $page) = each(%{$result->{query}->{pages}})) { my @media_titles; if (defined($page->{links})) { - my @link_titles = map $_->{title}, @{$page->{links}}; + my @link_titles + = map { $_->{title} } @{$page->{links}}; push(@media_titles, @link_titles); } if (defined($page->{images})) { - my @image_titles = map $_->{title}, @{$page->{images}}; + my @image_titles + = map { $_->{title} } @{$page->{images}}; push(@media_titles, @image_titles); } if (@media_titles) { @@ -419,6 +454,7 @@ sub get_linked_mediafiles { @titles = @titles[($batch+1)..$#titles]; } + return; } sub get_mw_mediafile_for_page_revision { @@ -432,7 +468,7 @@ sub get_mw_mediafile_for_page_revision { my $query = { action => 'query', prop => 'imageinfo', - titles => "File:" . $filename, + titles => "File:${filename}", iistart => $timestamp, iiend => $timestamp, iiprop => 'timestamp|archivename|url', @@ -450,47 +486,44 @@ sub get_mw_mediafile_for_page_revision { $mediafile{timestamp} = $fileinfo->{timestamp}; # Mediawiki::API's download function doesn't support https URLs # and can't download old versions of files. - print STDERR "\tDownloading file $mediafile{title}, version $mediafile{timestamp}\n"; + print {*STDERR} "\tDownloading file $mediafile{title}, version $mediafile{timestamp}\n"; $mediafile{content} = download_mw_mediafile($fileinfo->{url}); } return %mediafile; } sub download_mw_mediafile { - my $url = shift; + my $download_url = shift; - my $response = $mediawiki->{ua}->get($url); - if ($response->code == 200) { + my $response = $mediawiki->{ua}->get($download_url); + if ($response->code == HTTP_CODE_OK) { return $response->decoded_content; } else { - print STDERR "Error downloading mediafile from :\n"; - print STDERR "URL: $url\n"; - print STDERR "Server response: " . $response->code . " " . $response->message . "\n"; + print {*STDERR} "Error downloading mediafile from :\n"; + print {*STDERR} "URL: ${download_url}\n"; + print {*STDERR} 'Server response: ' . $response->code . q{ } . $response->message . "\n"; exit 1; } } sub get_last_local_revision { # Get note regarding last mediawiki revision - my $note = run_git("notes --ref=$remotename/mediawiki show refs/mediawiki/$remotename/master 2>/dev/null"); + my $note = run_git("notes --ref=${remotename}/mediawiki show refs/mediawiki/${remotename}/master 2>/dev/null"); my @note_info = split(/ /, $note); my $lastrevision_number; - if (!(defined($note_info[0]) && $note_info[0] eq "mediawiki_revision:")) { - print STDERR "No previous mediawiki revision found"; + if (!(defined($note_info[0]) && $note_info[0] eq 'mediawiki_revision:')) { + print {*STDERR} 'No previous mediawiki revision found'; $lastrevision_number = 0; } else { # Notes are formatted : mediawiki_revision: #number $lastrevision_number = $note_info[1]; chomp($lastrevision_number); - print STDERR "Last local mediawiki revision found is $lastrevision_number"; + print {*STDERR} "Last local mediawiki revision found is ${lastrevision_number}"; } return $lastrevision_number; } -# Remember the timestamp corresponding to a revision id. -my %basetimestamps; - # Get the last remote revision without taking in account which pages are # tracked or not. This function makes a single request to the wiki thus # avoid a loop onto all tracked pages. This is useful for the fetch-by-rev @@ -519,7 +552,7 @@ sub get_last_remote_revision { my $max_rev_num = 0; - print STDERR "Getting last revision id on tracked pages...\n"; + print {*STDERR} "Getting last revision id on tracked pages...\n"; foreach my $page (@pages) { my $id = $page->{pageid}; @@ -540,7 +573,7 @@ sub get_last_remote_revision { $max_rev_num = ($lastrev->{revid} > $max_rev_num ? $lastrev->{revid} : $max_rev_num); } - print STDERR "Last remote revision found is $max_rev_num.\n"; + print {*STDERR} "Last remote revision found is $max_rev_num.\n"; return $max_rev_num; } @@ -551,7 +584,7 @@ sub mediawiki_clean { # Mediawiki does not allow blank space at the end of a page and ends with a single \n. # This function right trims a string and adds a \n at the end to follow this rule $string =~ s/\s+$//; - if ($string eq "" && $page_created) { + if ($string eq EMPTY && $page_created) { # Creating empty pages is forbidden. $string = EMPTY_CONTENT; } @@ -562,15 +595,15 @@ sub mediawiki_clean { sub mediawiki_smudge { my $string = shift; if ($string eq EMPTY_CONTENT) { - $string = ""; + $string = EMPTY; } # This \n is important. This is due to mediawiki's way to handle end of files. - return $string."\n"; + return "${string}\n"; } sub mediawiki_clean_filename { my $filename = shift; - $filename =~ s/@{[SLASH_REPLACEMENT]}/\//g; + $filename =~ s{@{[SLASH_REPLACEMENT]}}{/}g; # [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. # Do a variant of URL-encoding, i.e. looks like URL-encoding, # but with _ added to prevent MediaWiki from thinking this is @@ -584,16 +617,17 @@ sub mediawiki_clean_filename { sub mediawiki_smudge_filename { my $filename = shift; - $filename =~ s/\//@{[SLASH_REPLACEMENT]}/g; + $filename =~ s{/}{@{[SLASH_REPLACEMENT]}}g; $filename =~ s/ /_/g; # Decode forbidden characters encoded in mediawiki_clean_filename - $filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; + $filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf('%c', hex($1))/ge; return $filename; } sub literal_data { my ($content) = @_; - print STDOUT "data ", bytes::length($content), "\n", $content; + print {*STDOUT} 'data ', bytes::length($content), "\n", $content; + return; } sub literal_data_raw { @@ -601,33 +635,37 @@ sub literal_data_raw { my ($content) = @_; # Avoid confusion between size in bytes and in characters utf8::downgrade($content); - binmode STDOUT, ":raw"; - print STDOUT "data ", bytes::length($content), "\n", $content; - binmode STDOUT, ":utf8"; + binmode {*STDOUT}, ':raw'; + print {*STDOUT} 'data ', bytes::length($content), "\n", $content; + binmode {*STDOUT}, ':encoding(UTF-8)'; + return; } sub mw_capabilities { # Revisions are imported to the private namespace # refs/mediawiki/$remotename/ by the helper and fetched into # refs/remotes/$remotename later by fetch. - print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; - print STDOUT "import\n"; - print STDOUT "list\n"; - print STDOUT "push\n"; - print STDOUT "\n"; + print {*STDOUT} "refspec refs/heads/*:refs/mediawiki/${remotename}/*\n"; + print {*STDOUT} "import\n"; + print {*STDOUT} "list\n"; + print {*STDOUT} "push\n"; + print {*STDOUT} "\n"; + return; } sub mw_list { # MediaWiki do not have branches, we consider one branch arbitrarily # called master, and HEAD pointing to it. - print STDOUT "? refs/heads/master\n"; - print STDOUT "\@refs/heads/master HEAD\n"; - print STDOUT "\n"; + print {*STDOUT} "? refs/heads/master\n"; + print {*STDOUT} "\@refs/heads/master HEAD\n"; + print {*STDOUT} "\n"; + return; } sub mw_option { - print STDERR "remote-helper command 'option $_[0]' not yet implemented\n"; - print STDOUT "unsupported\n"; + print {*STDERR} "remote-helper command 'option $_[0]' not yet implemented\n"; + print {*STDOUT} "unsupported\n"; + return; } sub fetch_mw_revisions_for_page { @@ -658,15 +696,15 @@ sub fetch_mw_revisions_for_page { push(@page_revs, $page_rev_ids); $revnum++; } - last unless $result->{'query-continue'}; + last if (!$result->{'query-continue'}); $query->{rvstartid} = $result->{'query-continue'}->{revisions}->{rvstartid}; } if ($shallow_import && @page_revs) { - print STDERR " Found 1 revision (shallow import).\n"; + print {*STDERR} " Found 1 revision (shallow import).\n"; @page_revs = sort {$b->{revid} <=> $a->{revid}} (@page_revs); return $page_revs[0]; } - print STDERR " Found ", $revnum, " revision(s).\n"; + print {*STDERR} " Found ${revnum} revision(s).\n"; return @page_revs; } @@ -678,8 +716,7 @@ sub fetch_mw_revisions { my $n = 1; foreach my $page (@pages) { my $id = $page->{pageid}; - - print STDERR "page $n/", scalar(@pages), ": ". $page->{title} ."\n"; + print {*STDERR} "page ${n}/", scalar(@pages), ': ', $page->{title}, "\n"; $n++; my @page_revs = fetch_mw_revisions_for_page($page, $id, $fetch_from); @revisions = (@page_revs, @revisions); @@ -693,7 +730,7 @@ sub fe_escape_path { $path =~ s/\\/\\\\/g; $path =~ s/"/\\"/g; $path =~ s/\n/\\n/g; - return '"' . $path . '"'; + return qq("${path}"); } sub import_file_revision { @@ -713,42 +750,43 @@ sub import_file_revision { my $author = $commit{author}; my $date = $commit{date}; - print STDOUT "commit refs/mediawiki/$remotename/master\n"; - print STDOUT "mark :$n\n"; - print STDOUT "committer $author <$author\@$wiki_name> ", $date->epoch, " +0000\n"; + print {*STDOUT} "commit refs/mediawiki/${remotename}/master\n"; + print {*STDOUT} "mark :${n}\n"; + print {*STDOUT} "committer ${author} <${author}\@${wiki_name}> " . $date->epoch . " +0000\n"; literal_data($comment); # If it's not a clone, we need to know where to start from if (!$full_import && $n == 1) { - print STDOUT "from refs/mediawiki/$remotename/master^0\n"; + print {*STDOUT} "from refs/mediawiki/${remotename}/master^0\n"; } if ($content ne DELETED_CONTENT) { - print STDOUT "M 644 inline " . - fe_escape_path($title . ".mw") . "\n"; + print {*STDOUT} 'M 644 inline ' . + fe_escape_path("${title}.mw") . "\n"; literal_data($content); if (%mediafile) { - print STDOUT "M 644 inline " + print {*STDOUT} 'M 644 inline ' . fe_escape_path($mediafile{title}) . "\n"; literal_data_raw($mediafile{content}); } - print STDOUT "\n\n"; + print {*STDOUT} "\n\n"; } else { - print STDOUT "D " . fe_escape_path($title . ".mw") . "\n"; + print {*STDOUT} 'D ' . fe_escape_path("${title}.mw") . "\n"; } # mediawiki revision number in the git note if ($full_import && $n == 1) { - print STDOUT "reset refs/notes/$remotename/mediawiki\n"; + print {*STDOUT} "reset refs/notes/${remotename}/mediawiki\n"; } - print STDOUT "commit refs/notes/$remotename/mediawiki\n"; - print STDOUT "committer $author <$author\@$wiki_name> ", $date->epoch, " +0000\n"; - literal_data("Note added by git-mediawiki during import"); + print {*STDOUT} "commit refs/notes/${remotename}/mediawiki\n"; + print {*STDOUT} "committer ${author} <${author}\@${wiki_name}> " . $date->epoch . " +0000\n"; + literal_data('Note added by git-mediawiki during import'); if (!$full_import && $n == 1) { - print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; + print {*STDOUT} "from refs/notes/${remotename}/mediawiki^0\n"; } - print STDOUT "N inline :$n\n"; - literal_data("mediawiki_revision: " . $commit{mw_revision}); - print STDOUT "\n\n"; + print {*STDOUT} "N inline :${n}\n"; + literal_data("mediawiki_revision: $commit{mw_revision}"); + print {*STDOUT} "\n\n"; + return; } # parse a sequence of @@ -761,23 +799,25 @@ sub get_more_refs { my @refs; while (1) { my $line = <STDIN>; - if ($line =~ m/^$cmd (.*)$/) { + if ($line =~ /^$cmd (.*)$/) { push(@refs, $1); } elsif ($line eq "\n") { return @refs; } else { - die("Invalid command in a '$cmd' batch: ". $_); + die("Invalid command in a '$cmd' batch: $_\n"); } } + return; } sub mw_import { # multiple import commands can follow each other. - my @refs = (shift, get_more_refs("import")); + my @refs = (shift, get_more_refs('import')); foreach my $ref (@refs) { mw_import_ref($ref); } - print STDOUT "done\n"; + print {*STDOUT} "done\n"; + return; } sub mw_import_ref { @@ -787,40 +827,41 @@ sub mw_import_ref { # Since HEAD is a symbolic ref to master (by convention, # followed by the output of the command "list" that we gave), # we don't need to do anything in this case. - if ($ref eq "HEAD") { + if ($ref eq 'HEAD') { return; } mw_connect_maybe(); - print STDERR "Searching revisions...\n"; + print {*STDERR} "Searching revisions...\n"; my $last_local = get_last_local_revision(); my $fetch_from = $last_local + 1; if ($fetch_from == 1) { - print STDERR ", fetching from beginning.\n"; + print {*STDERR} ", fetching from beginning.\n"; } else { - print STDERR ", fetching from here.\n"; + print {*STDERR} ", fetching from here.\n"; } my $n = 0; - if ($fetch_strategy eq "by_rev") { - print STDERR "Fetching & writing export data by revs...\n"; + if ($fetch_strategy eq 'by_rev') { + print {*STDERR} "Fetching & writing export data by revs...\n"; $n = mw_import_ref_by_revs($fetch_from); - } elsif ($fetch_strategy eq "by_page") { - print STDERR "Fetching & writing export data by pages...\n"; + } elsif ($fetch_strategy eq 'by_page') { + print {*STDERR} "Fetching & writing export data by pages...\n"; $n = mw_import_ref_by_pages($fetch_from); } else { - print STDERR "fatal: invalid fetch strategy \"$fetch_strategy\".\n"; - print STDERR "Check your configuration variables remote.$remotename.fetchStrategy and mediawiki.fetchStrategy\n"; + print {*STDERR} qq(fatal: invalid fetch strategy "${fetch_strategy}".\n); + print {*STDERR} "Check your configuration variables remote.${remotename}.fetchStrategy and mediawiki.fetchStrategy\n"; exit 1; } if ($fetch_from == 1 && $n == 0) { - print STDERR "You appear to have cloned an empty MediaWiki.\n"; + print {*STDERR} "You appear to have cloned an empty MediaWiki.\n"; # Something has to be done remote-helper side. If nothing is done, an error is # thrown saying that HEAD is referring to unknown object 0000000000000000000 # and the clone fails. } + return; } sub mw_import_ref_by_pages { @@ -832,7 +873,7 @@ sub mw_import_ref_by_pages { my ($n, @revisions) = fetch_mw_revisions(\@pages, $fetch_from); @revisions = sort {$a->{revid} <=> $b->{revid}} @revisions; - my @revision_ids = map $_->{revid}, @revisions; + my @revision_ids = map { $_->{revid} } @revisions; return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); } @@ -859,7 +900,7 @@ sub mw_import_revids { my $n_actual = 0; my $last_timestamp = 0; # Placeholer in case $rev->timestamp is undefined - foreach my $pagerevid (@$revision_ids) { + foreach my $pagerevid (@{$revision_ids}) { # Count page even if we skip it, since we display # $n/$total and $total includes skipped pages. $n++; @@ -875,7 +916,7 @@ sub mw_import_revids { my $result = $mediawiki->api($query); if (!$result) { - die "Failed to retrieve modified page for revision $pagerevid"; + die "Failed to retrieve modified page for revision $pagerevid\n"; } if (defined($result->{query}->{badrevids}->{$pagerevid})) { @@ -884,7 +925,7 @@ sub mw_import_revids { } if (!defined($result->{query}->{pages})) { - die "Invalid revision $pagerevid."; + die "Invalid revision ${pagerevid}.\n"; } my @result_pages = values(%{$result->{query}->{pages}}); @@ -894,8 +935,8 @@ sub mw_import_revids { my $page_title = $result_page->{title}; if (!exists($pages->{$page_title})) { - print STDERR "$n/", scalar(@$revision_ids), - ": Skipping revision #$rev->{revid} of $page_title\n"; + print {*STDERR} "${n}/", scalar(@{$revision_ids}), + ": Skipping revision #$rev->{revid} of ${page_title}\n"; next; } @@ -920,14 +961,14 @@ sub mw_import_revids { my %mediafile; if ($namespace) { my $id = get_mw_namespace_id($namespace); - if ($id && $id == get_mw_namespace_id("File")) { + if ($id && $id == get_mw_namespace_id('File')) { %mediafile = get_mw_mediafile_for_page_revision($filename, $rev->{timestamp}); } } # If this is a revision of the media page for new version # of a file do one common commit for both file and media page. # Else do commit only for that page. - print STDERR "$n/", scalar(@$revision_ids), ": Revision #$rev->{revid} of $commit{title}\n"; + print {*STDERR} "${n}/", scalar(@{$revision_ids}), ": Revision #$rev->{revid} of $commit{title}\n"; import_file_revision(\%commit, ($fetch_from == 1), $n_actual, \%mediafile); } @@ -935,17 +976,17 @@ sub mw_import_revids { } sub error_non_fast_forward { - my $advice = run_git("config --bool advice.pushNonFastForward"); + my $advice = run_git('config --bool advice.pushNonFastForward'); chomp($advice); - if ($advice ne "false") { + if ($advice ne 'false') { # Native git-push would show this after the summary. # We can't ask it to display it cleanly, so print it # ourselves before. - print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; - print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; - print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; + print {*STDERR} "To prevent you from losing history, non-fast-forward updates were rejected\n"; + print {*STDERR} "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; + print {*STDERR} "'Note about fast-forwards' section of 'git push --help' for details.\n"; } - print STDOUT "error $_[0] \"non-fast-forward\"\n"; + print {*STDOUT} qq(error $_[0] "non-fast-forward"\n); return 0; } @@ -956,11 +997,11 @@ sub mw_upload_file { my $file_deleted = shift; my $summary = shift; my $newrevid; - my $path = "File:" . $complete_file_name; + my $path = "File:${complete_file_name}"; my %hashFiles = get_allowed_file_extensions(); if (!exists($hashFiles{$extension})) { - print STDERR "$complete_file_name is not a permitted file on this wiki.\n"; - print STDERR "Check the configuration of file uploads in your mediawiki.\n"; + print {*STDERR} "${complete_file_name} is not a permitted file on this wiki.\n"; + print {*STDERR} "Check the configuration of file uploads in your mediawiki.\n"; return $newrevid; } # Deleting and uploading a file requires a priviledged user @@ -972,18 +1013,18 @@ sub mw_upload_file { reason => $summary }; if (!$mediawiki->edit($query)) { - print STDERR "Failed to delete file on remote wiki\n"; - print STDERR "Check your permissions on the remote site. Error code:\n"; - print STDERR $mediawiki->{error}->{code} . ':' . $mediawiki->{error}->{details}; + print {*STDERR} "Failed to delete file on remote wiki\n"; + print {*STDERR} "Check your permissions on the remote site. Error code:\n"; + print {*STDERR} $mediawiki->{error}->{code} . ':' . $mediawiki->{error}->{details}; exit 1; } } else { # Don't let perl try to interpret file content as UTF-8 => use "raw" - my $content = run_git("cat-file blob $new_sha1", "raw"); - if ($content ne "") { + my $content = run_git("cat-file blob ${new_sha1}", 'raw'); + if ($content ne EMPTY) { mw_connect_maybe(); $mediawiki->{config}->{upload_url} = - "$url/index.php/Special:Upload"; + "${url}/index.php/Special:Upload"; $mediawiki->edit({ action => 'upload', filename => $complete_file_name, @@ -995,12 +1036,12 @@ sub mw_upload_file { }, { skip_encoding => 1 } ) || die $mediawiki->{error}->{code} . ':' - . $mediawiki->{error}->{details}; + . $mediawiki->{error}->{details} . "\n"; my $last_file_page = $mediawiki->get_page({title => $path}); $newrevid = $last_file_page->{revid}; - print STDERR "Pushed file: $new_sha1 - $complete_file_name.\n"; + print {*STDERR} "Pushed file: ${new_sha1} - ${complete_file_name}.\n"; } else { - print STDERR "Empty file $complete_file_name not pushed.\n"; + print {*STDERR} "Empty file ${complete_file_name} not pushed.\n"; } } return $newrevid; @@ -1022,7 +1063,7 @@ sub mw_push_file { my $newrevid; if ($summary eq EMPTY_MESSAGE) { - $summary = ''; + $summary = EMPTY; } my $new_sha1 = $diff_info_split[3]; @@ -1033,13 +1074,13 @@ sub mw_push_file { my ($title, $extension) = $complete_file_name =~ /^(.*)\.([^\.]*)$/; if (!defined($extension)) { - $extension = ""; + $extension = EMPTY; } - if ($extension eq "mw") { + if ($extension eq 'mw') { my $ns = get_mw_namespace_id_for_page($complete_file_name); - if ($ns && $ns == get_mw_namespace_id("File") && (!$export_media)) { - print STDERR "Ignoring media file related page: $complete_file_name\n"; - return ($oldrevid, "ok"); + if ($ns && $ns == get_mw_namespace_id('File') && (!$export_media)) { + print {*STDERR} "Ignoring media file related page: ${complete_file_name}\n"; + return ($oldrevid, 'ok'); } my $file_content; if ($page_deleted) { @@ -1049,7 +1090,7 @@ sub mw_push_file { # with this content instead: $file_content = DELETED_CONTENT; } else { - $file_content = run_git("cat-file blob $new_sha1"); + $file_content = run_git("cat-file blob ${new_sha1}"); } mw_connect_maybe(); @@ -1066,49 +1107,49 @@ sub mw_push_file { if (!$result) { if ($mediawiki->{error}->{code} == 3) { # edit conflicts, considered as non-fast-forward - print STDERR 'Warning: Error ' . + print {*STDERR} 'Warning: Error ' . $mediawiki->{error}->{code} . - ' from mediwiki: ' . $mediawiki->{error}->{details} . + ' from mediawiki: ' . $mediawiki->{error}->{details} . ".\n"; - return ($oldrevid, "non-fast-forward"); + return ($oldrevid, 'non-fast-forward'); } else { # Other errors. Shouldn't happen => just die() die 'Fatal: Error ' . $mediawiki->{error}->{code} . - ' from mediwiki: ' . $mediawiki->{error}->{details}; + ' from mediawiki: ' . $mediawiki->{error}->{details} . "\n"; } } $newrevid = $result->{edit}->{newrevid}; - print STDERR "Pushed file: $new_sha1 - $title\n"; + print {*STDERR} "Pushed file: ${new_sha1} - ${title}\n"; } elsif ($export_media) { $newrevid = mw_upload_file($complete_file_name, $new_sha1, $extension, $page_deleted, $summary); } else { - print STDERR "Ignoring media file $title\n"; + print {*STDERR} "Ignoring media file ${title}\n"; } $newrevid = ($newrevid or $oldrevid); - return ($newrevid, "ok"); + return ($newrevid, 'ok'); } sub mw_push { # multiple push statements can follow each other - my @refsspecs = (shift, get_more_refs("push")); + my @refsspecs = (shift, get_more_refs('push')); my $pushed; for my $refspec (@refsspecs) { my ($force, $local, $remote) = $refspec =~ /^(\+)?([^:]*):([^:]*)$/ - or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>"); + or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>\n"); if ($force) { - print STDERR "Warning: forced push not allowed on a MediaWiki.\n"; + print {*STDERR} "Warning: forced push not allowed on a MediaWiki.\n"; } - if ($local eq "") { - print STDERR "Cannot delete remote branch on a MediaWiki\n"; - print STDOUT "error $remote cannot delete\n"; + if ($local eq EMPTY) { + print {*STDERR} "Cannot delete remote branch on a MediaWiki\n"; + print {*STDOUT} "error ${remote} cannot delete\n"; next; } - if ($remote ne "refs/heads/master") { - print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n"; - print STDOUT "error $remote only master allowed\n"; + if ($remote ne 'refs/heads/master') { + print {*STDERR} "Only push to the branch 'master' is supported on a MediaWiki\n"; + print {*STDOUT} "error ${remote} only master allowed\n"; next; } if (mw_push_revision($local, $remote)) { @@ -1117,30 +1158,32 @@ sub mw_push { } # Notify Git that the push is done - print STDOUT "\n"; + print {*STDOUT} "\n"; if ($pushed && $dumb_push) { - print STDERR "Just pushed some revisions to MediaWiki.\n"; - print STDERR "The pushed revisions now have to be re-imported, and your current branch\n"; - print STDERR "needs to be updated with these re-imported commits. You can do this with\n"; - print STDERR "\n"; - print STDERR " git pull --rebase\n"; - print STDERR "\n"; + print {*STDERR} "Just pushed some revisions to MediaWiki.\n"; + print {*STDERR} "The pushed revisions now have to be re-imported, and your current branch\n"; + print {*STDERR} "needs to be updated with these re-imported commits. You can do this with\n"; + print {*STDERR} "\n"; + print {*STDERR} " git pull --rebase\n"; + print {*STDERR} "\n"; } + return; } sub mw_push_revision { my $local = shift; my $remote = shift; # actually, this has to be "refs/heads/master" at this point. my $last_local_revid = get_last_local_revision(); - print STDERR ".\n"; # Finish sentence started by get_last_local_revision() + print {*STDERR} ".\n"; # Finish sentence started by get_last_local_revision() my $last_remote_revid = get_last_remote_revision(); my $mw_revision = $last_remote_revid; # Get sha1 of commit pointed by local HEAD - my $HEAD_sha1 = run_git("rev-parse $local 2>/dev/null"); chomp($HEAD_sha1); + my $HEAD_sha1 = run_git("rev-parse ${local} 2>/dev/null"); + chomp($HEAD_sha1); # Get sha1 of commit pointed by remotes/$remotename/master - my $remoteorigin_sha1 = run_git("rev-parse refs/remotes/$remotename/master 2>/dev/null"); + my $remoteorigin_sha1 = run_git("rev-parse refs/remotes/${remotename}/master 2>/dev/null"); chomp($remoteorigin_sha1); if ($last_local_revid > 0 && @@ -1159,22 +1202,22 @@ sub mw_push_revision { if ($last_local_revid > 0) { my $parsed_sha1 = $remoteorigin_sha1; # Find a path from last MediaWiki commit to pushed commit - print STDERR "Computing path from local to remote ...\n"; - my @local_ancestry = split(/\n/, run_git("rev-list --boundary --parents $local ^$parsed_sha1")); + print {*STDERR} "Computing path from local to remote ...\n"; + my @local_ancestry = split(/\n/, run_git("rev-list --boundary --parents ${local} ^${parsed_sha1}")); my %local_ancestry; foreach my $line (@local_ancestry) { - if (my ($child, $parents) = $line =~ m/^-?([a-f0-9]+) ([a-f0-9 ]+)/) { - foreach my $parent (split(' ', $parents)) { + if (my ($child, $parents) = $line =~ /^-?([a-f0-9]+) ([a-f0-9 ]+)/) { + foreach my $parent (split(/ /, $parents)) { $local_ancestry{$parent} = $child; } - } elsif (!$line =~ m/^([a-f0-9]+)/) { - die "Unexpected output from git rev-list: $line"; + } elsif (!$line =~ /^([a-f0-9]+)/) { + die "Unexpected output from git rev-list: ${line}\n"; } } while ($parsed_sha1 ne $HEAD_sha1) { my $child = $local_ancestry{$parsed_sha1}; if (!$child) { - printf STDERR "Cannot find a path in history from remote commit to last commit\n"; + print {*STDERR} "Cannot find a path in history from remote commit to last commit\n"; return error_non_fast_forward($remote); } push(@commit_pairs, [$parsed_sha1, $child]); @@ -1183,12 +1226,12 @@ sub mw_push_revision { } else { # No remote mediawiki revision. Export the whole # history (linearized with --first-parent) - print STDERR "Warning: no common ancestor, pushing complete history\n"; - my $history = run_git("rev-list --first-parent --children $local"); - my @history = split('\n', $history); + print {*STDERR} "Warning: no common ancestor, pushing complete history\n"; + my $history = run_git("rev-list --first-parent --children ${local}"); + my @history = split(/\n/, $history); @history = @history[1..$#history]; foreach my $line (reverse @history) { - my @commit_info_split = split(/ |\n/, $line); + my @commit_info_split = split(/[ \n]/, $line); push(@commit_pairs, \@commit_info_split); } } @@ -1196,12 +1239,12 @@ sub mw_push_revision { foreach my $commit_info_split (@commit_pairs) { my $sha1_child = @{$commit_info_split}[0]; my $sha1_commit = @{$commit_info_split}[1]; - my $diff_infos = run_git("diff-tree -r --raw -z $sha1_child $sha1_commit"); + my $diff_infos = run_git("diff-tree -r --raw -z ${sha1_child} ${sha1_commit}"); # TODO: we could detect rename, and encode them with a #redirect on the wiki. # TODO: for now, it's just a delete+add my @diff_info_list = split(/\0/, $diff_infos); # Keep the subject line of the commit message as mediawiki comment for the revision - my $commit_msg = run_git("log --no-walk --format=\"%s\" $sha1_commit"); + my $commit_msg = run_git(qq(log --no-walk --format="%s" ${sha1_commit})); chomp($commit_msg); # Push every blob while (@diff_info_list) { @@ -1213,7 +1256,7 @@ sub mw_push_revision { my $info = shift(@diff_info_list); my $file = shift(@diff_info_list); ($mw_revision, $status) = mw_push_file($info, $file, $commit_msg, $mw_revision); - if ($status eq "non-fast-forward") { + if ($status eq 'non-fast-forward') { # we may already have sent part of the # commit to MediaWiki, but it's too # late to cancel it. Stop the push in @@ -1221,17 +1264,17 @@ sub mw_push_revision { # accurate error message. return error_non_fast_forward($remote); } - if ($status ne "ok") { - die("Unknown error from mw_push_file()"); + if ($status ne 'ok') { + die("Unknown error from mw_push_file()\n"); } } - unless ($dumb_push) { - run_git("notes --ref=$remotename/mediawiki add -f -m \"mediawiki_revision: $mw_revision\" $sha1_commit"); - run_git("update-ref -m \"Git-MediaWiki push\" refs/mediawiki/$remotename/master $sha1_commit $sha1_child"); + if (!$dumb_push) { + run_git(qq(notes --ref=${remotename}/mediawiki add -f -m "mediawiki_revision: ${mw_revision}" ${sha1_commit})); + run_git(qq(update-ref -m "Git-MediaWiki push" refs/mediawiki/${remotename}/master ${sha1_commit} ${sha1_child})); } } - print STDOUT "ok $remote\n"; + print {*STDOUT} "ok ${remote}\n"; return 1; } @@ -1244,8 +1287,8 @@ sub get_allowed_file_extensions { siprop => 'fileextensions' }; my $result = $mediawiki->api($query); - my @file_extensions= map $_->{ext},@{$result->{query}->{fileextensions}}; - my %hashFile = map {$_ => 1}@file_extensions; + my @file_extensions = map { $_->{ext}} @{$result->{query}->{fileextensions}}; + my %hashFile = map { $_ => 1 } @file_extensions; return %hashFile; } @@ -1267,8 +1310,8 @@ sub get_mw_namespace_id { # Look at configuration file, if the record for that namespace is # already cached. Namespaces are stored in form: # "Name_of_namespace:Id_namespace", ex.: "File:6". - my @temp = split(/[\n]/, run_git("config --get-all remote." - . $remotename .".namespaceCache")); + my @temp = split(/\n/, + run_git("config --get-all remote.${remotename}.namespaceCache")); chomp(@temp); foreach my $ns (@temp) { my ($n, $id) = split(/:/, $ns); @@ -1282,7 +1325,7 @@ sub get_mw_namespace_id { } if (!exists $namespace_id{$name}) { - print STDERR "Namespace $name not found in cache, querying the wiki ...\n"; + print {*STDERR} "Namespace ${name} not found in cache, querying the wiki ...\n"; # NS not found => get namespace id from MW and store it in # configuration file. my $query = { @@ -1306,8 +1349,8 @@ sub get_mw_namespace_id { my $ns = $namespace_id{$name}; my $id; - unless (defined $ns) { - print STDERR "No such namespace $name on MediaWiki.\n"; + if (!defined $ns) { + print {*STDERR} "No such namespace ${name} on MediaWiki.\n"; $ns = {is_namespace => 0}; $namespace_id{$name} = $ns; } @@ -1321,15 +1364,15 @@ sub get_mw_namespace_id { # Store explicitely requested namespaces on disk if (!exists $cached_mw_namespace_id{$name}) { - run_git("config --add remote.". $remotename - .".namespaceCache \"". $name .":". $store_id ."\""); + run_git(qq(config --add remote.${remotename}.namespaceCache "${name}:${store_id}")); $cached_mw_namespace_id{$name} = 1; } return $id; } sub get_mw_namespace_id_for_page { - if (my ($namespace) = $_[0] =~ /^([^:]*):/) { + my $namespace = shift; + if ($namespace =~ /^([^:]*):/) { return get_mw_namespace_id($namespace); } else { return; diff --git a/contrib/mw-to-git/t/test-gitmw-lib.sh b/contrib/mw-to-git/t/test-gitmw-lib.sh index 3b2cfacf51..bb76cee379 100755 --- a/contrib/mw-to-git/t/test-gitmw-lib.sh +++ b/contrib/mw-to-git/t/test-gitmw-lib.sh @@ -336,20 +336,21 @@ wiki_install () { fi # Fetch MediaWiki's archive if not already present in the TMP directory + MW_FILENAME="mediawiki-$MW_VERSION_MAJOR.$MW_VERSION_MINOR.tar.gz" cd "$TMP" - if [ ! -f "$MW_VERSION.tar.gz" ] ; then - echo "Downloading $MW_VERSION sources ..." - wget "http://download.wikimedia.org/mediawiki/1.19/mediawiki-1.19.0.tar.gz" || + if [ ! -f $MW_FILENAME ] ; then + echo "Downloading $MW_VERSION_MAJOR.$MW_VERSION_MINOR sources ..." + wget "http://download.wikimedia.org/mediawiki/$MW_VERSION_MAJOR/$MW_FILENAME" || error "Unable to download "\ - "http://download.wikimedia.org/mediawiki/1.19/"\ - "mediawiki-1.19.0.tar.gz. "\ + "http://download.wikimedia.org/mediawiki/$MW_VERSION_MAJOR/"\ + "$MW_FILENAME. "\ "Please fix your connection and launch the script again." - echo "$MW_VERSION.tar.gz downloaded in `pwd`. "\ + echo "$MW_FILENAME downloaded in `pwd`. "\ "You can delete it later if you want." else - echo "Reusing existing $MW_VERSION.tar.gz downloaded in `pwd`." + echo "Reusing existing $MW_FILENAME downloaded in `pwd`." fi - archive_abs_path=$(pwd)/"$MW_VERSION.tar.gz" + archive_abs_path=$(pwd)/$MW_FILENAME cd "$WIKI_DIR_INST/$WIKI_DIR_NAME/" || error "can't cd to $WIKI_DIR_INST/$WIKI_DIR_NAME/" tar xzf "$archive_abs_path" --strip-components=1 || @@ -431,5 +432,5 @@ wiki_delete () { # Delete the wiki's SQLite database rm -f "$TMP/$DB_FILE" || error "Database $TMP/$DB_FILE could not be deleted." rm -f "$FILES_FOLDER/$DB_FILE" - rm -rf "$TMP/$MW_VERSION" + rm -rf "$TMP/mediawiki-$MW_VERSION_MAJOR.$MW_VERSION_MINOR.tar.gz" } diff --git a/contrib/mw-to-git/t/test.config b/contrib/mw-to-git/t/test.config index 958b37b4a7..4cfebe9c69 100644 --- a/contrib/mw-to-git/t/test.config +++ b/contrib/mw-to-git/t/test.config @@ -30,6 +30,8 @@ WEB_WWW=$WEB/www # The variables below are used by the script to install a wiki. # You should not modify these unless you are modifying the script itself. -MW_VERSION=mediawiki-1.19.0 +# tested versions: 1.19.X -> 1.21.1 +MW_VERSION_MAJOR=1.21 +MW_VERSION_MINOR=1 FILES_FOLDER=install-wiki DB_INSTALL_SCRIPT=db_install.php |