diff options
Diffstat (limited to 'contrib/mw-to-git')
-rw-r--r-- | contrib/mw-to-git/.gitignore | 2 | ||||
-rw-r--r-- | contrib/mw-to-git/.perlcriticrc | 28 | ||||
-rw-r--r-- | contrib/mw-to-git/Git/Mediawiki.pm | 100 | ||||
-rw-r--r-- | contrib/mw-to-git/Makefile | 80 | ||||
-rwxr-xr-x | contrib/mw-to-git/bin-wrapper/git | 14 | ||||
-rwxr-xr-x | contrib/mw-to-git/git-mw.perl | 368 | ||||
-rwxr-xr-x | contrib/mw-to-git/git-remote-mediawiki.perl (renamed from contrib/mw-to-git/git-remote-mediawiki) | 704 | ||||
-rw-r--r-- | contrib/mw-to-git/git-remote-mediawiki.txt | 2 | ||||
-rw-r--r-- | contrib/mw-to-git/t/README | 6 | ||||
-rwxr-xr-x | contrib/mw-to-git/t/install-wiki.sh | 12 | ||||
-rw-r--r-- | contrib/mw-to-git/t/install-wiki/LocalSettings.php | 2 | ||||
-rwxr-xr-x | contrib/mw-to-git/t/t9360-mw-to-git-clone.sh | 14 | ||||
-rwxr-xr-x | contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh | 44 | ||||
-rwxr-xr-x | contrib/mw-to-git/t/t9363-mw-to-git-export-import.sh | 19 | ||||
-rwxr-xr-x | contrib/mw-to-git/t/t9365-continuing-queries.sh | 23 | ||||
-rwxr-xr-x | contrib/mw-to-git/t/test-gitmw-lib.sh | 33 | ||||
-rw-r--r-- | contrib/mw-to-git/t/test.config | 6 |
17 files changed, 1022 insertions, 435 deletions
diff --git a/contrib/mw-to-git/.gitignore b/contrib/mw-to-git/.gitignore new file mode 100644 index 0000000000..ae545b013d --- /dev/null +++ b/contrib/mw-to-git/.gitignore @@ -0,0 +1,2 @@ +git-remote-mediawiki +git-mw diff --git a/contrib/mw-to-git/.perlcriticrc b/contrib/mw-to-git/.perlcriticrc new file mode 100644 index 0000000000..5a9955d757 --- /dev/null +++ b/contrib/mw-to-git/.perlcriticrc @@ -0,0 +1,28 @@ +# These 3 rules demand to add the s, m and x flag to *every* regexp. This is +# overkill and would be harmful for readability. +[-RegularExpressions::RequireExtendedFormatting] +[-RegularExpressions::RequireDotMatchAnything] +[-RegularExpressions::RequireLineBoundaryMatching] + +# This rule says that builtin functions should not be called with parentheses +# e.g.: (taken from CPAN's documentation) +# open($handle, '>', $filename); #not ok +# open $handle, '>', $filename; #ok +# Applying such a rule would mean modifying a huge number of lines for a +# question of style. +[-CodeLayout::ProhibitParensWithBuiltins] + +# This rule states that each system call should have its return value checked +# The problem is that it includes the print call. Checking every print call's +# return value would be harmful to the code readabilty. +# This configuration keeps all default function but print. +[InputOutput::RequireCheckedSyscalls] +functions = open say close + +# This rules demands to add a dependancy for the Readonly module. This is not +# wished. +[-ValuesAndExpressions::ProhibitConstantPragma] + +# This rule is not really useful (rather a question of style) and produces many +# warnings among the code. +[-ValuesAndExpressions::ProhibitNoisyQuotes] diff --git a/contrib/mw-to-git/Git/Mediawiki.pm b/contrib/mw-to-git/Git/Mediawiki.pm new file mode 100644 index 0000000000..d13c4dfa7d --- /dev/null +++ b/contrib/mw-to-git/Git/Mediawiki.pm @@ -0,0 +1,100 @@ +package Git::Mediawiki; + +use 5.008; +use strict; +use Git; + +BEGIN { + +our ($VERSION, @ISA, @EXPORT, @EXPORT_OK); + +# Totally unstable API. +$VERSION = '0.01'; + +require Exporter; + +@ISA = qw(Exporter); + +@EXPORT = (); + +# Methods which can be called as standalone functions as well: +@EXPORT_OK = qw(clean_filename smudge_filename connect_maybe + EMPTY HTTP_CODE_OK HTTP_CODE_PAGE_NOT_FOUND); +} + +# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced +use constant SLASH_REPLACEMENT => '%2F'; + +# Used to test for empty strings +use constant EMPTY => q{}; + +# HTTP codes +use constant HTTP_CODE_OK => 200; +use constant HTTP_CODE_PAGE_NOT_FOUND => 404; + +sub clean_filename { + my $filename = shift; + $filename =~ s{@{[SLASH_REPLACEMENT]}}{/}g; + # [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. + # Do a variant of URL-encoding, i.e. looks like URL-encoding, + # but with _ added to prevent MediaWiki from thinking this is + # an actual special character. + $filename =~ s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; + # If we use the uri escape before + # we should unescape here, before anything + + return $filename; +} + +sub smudge_filename { + my $filename = shift; + $filename =~ s{/}{@{[SLASH_REPLACEMENT]}}g; + $filename =~ s/ /_/g; + # Decode forbidden characters encoded in clean_filename + $filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf('%c', hex($1))/ge; + return $filename; +} + +sub connect_maybe { + my $wiki = shift; + if ($wiki) { + return $wiki; + } + + my $remote_name = shift; + my $remote_url = shift; + my ($wiki_login, $wiki_password, $wiki_domain); + + $wiki_login = Git::config("remote.${remote_name}.mwLogin"); + $wiki_password = Git::config("remote.${remote_name}.mwPassword"); + $wiki_domain = Git::config("remote.${remote_name}.mwDomain"); + + $wiki = MediaWiki::API->new; + $wiki->{config}->{api_url} = "${remote_url}/api.php"; + if ($wiki_login) { + my %credential = ( + 'url' => $remote_url, + 'username' => $wiki_login, + 'password' => $wiki_password + ); + Git::credential(\%credential); + my $request = {lgname => $credential{username}, + lgpassword => $credential{password}, + lgdomain => $wiki_domain}; + if ($wiki->login($request)) { + Git::credential(\%credential, 'approve'); + print {*STDERR} qq(Logged in mediawiki user "$credential{username}".\n); + } else { + print {*STDERR} qq(Failed to log in mediawiki user "$credential{username}" on ${remote_url}\n); + print {*STDERR} ' (error ' . + $wiki->{error}->{code} . ': ' . + $wiki->{error}->{details} . ")\n"; + Git::credential(\%credential, 'reject'); + exit 1; + } + } + + return $wiki; +} + +1; # Famous last words diff --git a/contrib/mw-to-git/Makefile b/contrib/mw-to-git/Makefile index 3ed728b0ef..a4b6f7a2cd 100644 --- a/contrib/mw-to-git/Makefile +++ b/contrib/mw-to-git/Makefile @@ -1,47 +1,57 @@ # -# Copyright (C) 2012 -# Charles Roussel <charles.roussel@ensimag.imag.fr> -# Simon Cathebras <simon.cathebras@ensimag.imag.fr> -# Julien Khayat <julien.khayat@ensimag.imag.fr> -# Guillaume Sasdy <guillaume.sasdy@ensimag.imag.fr> -# Simon Perrat <simon.perrat@ensimag.imag.fr> +# Copyright (C) 2013 +# Matthieu Moy <Matthieu.Moy@imag.fr> # -## Build git-remote-mediawiki +# To build and test: +# +# make +# bin-wrapper/git mw preview Some_page.mw +# bin-wrapper/git clone mediawiki::http://example.com/wiki/ +# +# To install, run Git's toplevel 'make install' then run: +# +# make install --include ../../config.mak.autogen --include ../../config.mak +GIT_MEDIAWIKI_PM=Git/Mediawiki.pm +SCRIPT_PERL=git-remote-mediawiki.perl +SCRIPT_PERL+=git-mw.perl +GIT_ROOT_DIR=../.. +HERE=contrib/mw-to-git/ -ifndef PERL_PATH - PERL_PATH = /usr/bin/perl -endif -ifndef gitexecdir - gitexecdir = $(shell git --exec-path) -endif +INSTALL = install -PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) -gitexecdir_SQ = $(subst ','\'',$(gitexecdir)) -SCRIPT = git-remote-mediawiki +SCRIPT_PERL_FULL=$(patsubst %,$(HERE)/%,$(SCRIPT_PERL)) +INSTLIBDIR=$(shell $(MAKE) -C $(GIT_ROOT_DIR)/perl \ + -s --no-print-directory instlibdir) +DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) +INSTLIBDIR_SQ = $(subst ','\'',$(INSTLIBDIR)) -.PHONY: install help doc test clean +all: build -help: - @echo 'This is the help target of the Makefile. Current configuration:' - @echo ' gitexecdir = $(gitexecdir_SQ)' - @echo ' PERL_PATH = $(PERL_PATH_SQ)' - @echo 'Run "$(MAKE) install" to install $(SCRIPT) in gitexecdir' - @echo 'Run "$(MAKE) test" to run the testsuite' +test: all + $(MAKE) -C t -install: - sed -e '1s|#!.*/perl|#!$(PERL_PATH_SQ)|' $(SCRIPT) \ - > '$(gitexecdir_SQ)/$(SCRIPT)' - chmod +x '$(gitexecdir)/$(SCRIPT)' +check: perlcritic test -doc: - @echo 'Sorry, "make doc" is not implemented yet for $(SCRIPT)' +install_pm: + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(INSTLIBDIR_SQ)/Git' + $(INSTALL) -m 644 $(GIT_MEDIAWIKI_PM) \ + '$(DESTDIR_SQ)$(INSTLIBDIR_SQ)/$(GIT_MEDIAWIKI_PM)' -test: - $(MAKE) -C t/ test +build: + $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL="$(SCRIPT_PERL_FULL)" \ + build-perl-script + +install: install_pm + $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL="$(SCRIPT_PERL_FULL)" \ + install-perl-script clean: - $(RM) '$(gitexecdir)/$(SCRIPT)' - $(MAKE) -C t/ clean + $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL="$(SCRIPT_PERL_FULL)" \ + clean-perl-script + +perlcritic: + perlcritic -5 $(SCRIPT_PERL) + -perlcritic -2 $(SCRIPT_PERL) + +.PHONY: all test check install_pm install clean perlcritic diff --git a/contrib/mw-to-git/bin-wrapper/git b/contrib/mw-to-git/bin-wrapper/git new file mode 100755 index 0000000000..6663ae57e8 --- /dev/null +++ b/contrib/mw-to-git/bin-wrapper/git @@ -0,0 +1,14 @@ +#!/bin/sh + +# git executable wrapper script for Git-Mediawiki to run tests without +# installing all the scripts and perl packages. + +GIT_ROOT_DIR=../../.. +GIT_EXEC_PATH=$(cd "$(dirname "$0")" && cd ${GIT_ROOT_DIR} && pwd) + +GITPERLLIB="$GIT_EXEC_PATH"'/contrib/mw-to-git'"${GITPERLLIB:+:$GITPERLLIB}" +PATH="$GIT_EXEC_PATH"'/contrib/mw-to-git:'"$PATH" + +export GITPERLLIB PATH + +exec "${GIT_EXEC_PATH}/bin-wrappers/git" "$@" diff --git a/contrib/mw-to-git/git-mw.perl b/contrib/mw-to-git/git-mw.perl new file mode 100755 index 0000000000..28df3ee321 --- /dev/null +++ b/contrib/mw-to-git/git-mw.perl @@ -0,0 +1,368 @@ +#!/usr/bin/perl + +# Copyright (C) 2013 +# Benoit Person <benoit.person@ensimag.imag.fr> +# Celestin Matte <celestin.matte@ensimag.imag.fr> +# License: GPL v2 or later + +# Set of tools for git repo with a mediawiki remote. +# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/ + +use strict; +use warnings; + +use Getopt::Long; +use URI::URL qw(url); +use LWP::UserAgent; +use HTML::TreeBuilder; + +use Git; +use MediaWiki::API; +use Git::Mediawiki qw(clean_filename connect_maybe + EMPTY HTTP_CODE_PAGE_NOT_FOUND); + +# By default, use UTF-8 to communicate with Git and the user +binmode STDERR, ':encoding(UTF-8)'; +binmode STDOUT, ':encoding(UTF-8)'; + +# Global parameters +my $verbose = 0; +sub v_print { + if ($verbose) { + return print {*STDERR} @_; + } + return; +} + +# Preview parameters +my $file_name = EMPTY; +my $remote_name = EMPTY; +my $preview_file_name = EMPTY; +my $autoload = 0; +sub file { + $file_name = shift; + return $file_name; +} + +my %commands = ( + 'help' => + [\&help, {}, \&help], + 'preview' => + [\&preview, { + '<>' => \&file, + 'output|o=s' => \$preview_file_name, + 'remote|r=s' => \$remote_name, + 'autoload|a' => \$autoload + }, \&preview_help] +); + +# Search for sub-command +my $cmd = $commands{'help'}; +for (0..@ARGV-1) { + if (defined $commands{$ARGV[$_]}) { + $cmd = $commands{$ARGV[$_]}; + splice @ARGV, $_, 1; + last; + } +}; +GetOptions( %{$cmd->[1]}, + 'help|h' => \&{$cmd->[2]}, + 'verbose|v' => \$verbose); + +# Launch command +&{$cmd->[0]}; + +############################# Preview Functions ################################ + +sub preview_help { + print {*STDOUT} <<'END'; +USAGE: git mw preview [--remote|-r <remote name>] [--autoload|-a] + [--output|-o <output filename>] [--verbose|-v] + <blob> | <filename> + +DESCRIPTION: +Preview is an utiliy to preview local content of a mediawiki repo as if it was +pushed on the remote. + +For that, preview searches for the remote name of the current branch's +upstream if --remote is not set. If that remote is not found or if it +is not a mediawiki, it lists all mediawiki remotes configured and asks +you to replay your command with the --remote option set properly. + +Then, it searches for a file named 'filename'. If it's not found in +the current dir, it will assume it's a blob. + +The content retrieved in the file (or in the blob) will then be parsed +by the remote mediawiki and combined with a template retrieved from +the mediawiki. + +Finally, preview will save the HTML result in a file. and autoload it +in your default web browser if the option --autoload is present. + +OPTIONS: + -r <remote name>, --remote <remote name> + If the remote is a mediawiki, the template and the parse engine + used for the preview will be those of that remote. + If not, a list of valid remotes will be shown. + + -a, --autoload + Try to load the HTML output in a new tab (or new window) of your + default web browser. + + -o <output filename>, --output <output filename> + Change the HTML output filename. Default filename is based on the + input filename with its extension replaced by '.html'. + + -v, --verbose + Show more information on what's going on under the hood. +END + exit; +} + +sub preview { + my $wiki; + my ($remote_url, $wiki_page_name); + my ($new_content, $template); + my $file_content; + + if ($file_name eq EMPTY) { + die "Missing file argument, see `git mw help`\n"; + } + + v_print("### Selecting remote\n"); + if ($remote_name eq EMPTY) { + $remote_name = find_upstream_remote_name(); + if ($remote_name) { + $remote_url = mediawiki_remote_url_maybe($remote_name); + } + + if (! $remote_url) { + my @valid_remotes = find_mediawiki_remotes(); + + if ($#valid_remotes == 0) { + print {*STDERR} "No mediawiki remote in this repo. \n"; + exit 1; + } else { + my $remotes_list = join("\n\t", @valid_remotes); + print {*STDERR} <<"MESSAGE"; +There are multiple mediawiki remotes, which of: + ${remotes_list} +do you want ? Use the -r option to specify the remote. +MESSAGE + } + + exit 1; + } + } else { + if (!is_valid_remote($remote_name)) { + die "${remote_name} is not a remote\n"; + } + + $remote_url = mediawiki_remote_url_maybe($remote_name); + if (! $remote_url) { + die "${remote_name} is not a mediawiki remote\n"; + } + } + v_print("selected remote:\n\tname: ${remote_name}\n\turl: ${remote_url}\n"); + + $wiki = connect_maybe($wiki, $remote_name, $remote_url); + + # Read file content + if (! -e $file_name) { + $file_content = git_cmd_try { + Git::command('cat-file', 'blob', $file_name); } + "%s failed w/ code %d"; + + if ($file_name =~ /(.+):(.+)/) { + $file_name = $2; + } + } else { + open my $read_fh, "<", $file_name + or die "could not open ${file_name}: $!\n"; + $file_content = do { local $/ = undef; <$read_fh> }; + close $read_fh + or die "unable to close: $!\n"; + } + + v_print("### Retrieving template\n"); + ($wiki_page_name = clean_filename($file_name)) =~ s/\.[^.]+$//; + $template = get_template($remote_url, $wiki_page_name); + + v_print("### Parsing local content\n"); + $new_content = $wiki->api({ + action => 'parse', + text => $file_content, + title => $wiki_page_name + }, { + skip_encoding => 1 + }) or die "No response from remote mediawiki\n"; + $new_content = $new_content->{'parse'}->{'text'}->{'*'}; + + v_print("### Merging contents\n"); + if ($preview_file_name eq EMPTY) { + ($preview_file_name = $file_name) =~ s/\.[^.]+$/.html/; + } + open(my $save_fh, '>:encoding(UTF-8)', $preview_file_name) + or die "Could not open: $!\n"; + print {$save_fh} merge_contents($template, $new_content, $remote_url); + close($save_fh) + or die "Could not close: $!\n"; + + v_print("### Results\n"); + if ($autoload) { + v_print("Launching browser w/ file: ${preview_file_name}"); + system('git', 'web--browse', $preview_file_name); + } else { + print {*STDERR} "Preview file saved as: ${preview_file_name}\n"; + } + + exit; +} + +# uses global scope variable: $remote_name +sub merge_contents { + my $template = shift; + my $content = shift; + my $remote_url = shift; + my ($content_tree, $html_tree, $mw_content_text); + my $template_content_id = 'bodyContent'; + + $html_tree = HTML::TreeBuilder->new; + $html_tree->parse($template); + + $content_tree = HTML::TreeBuilder->new; + $content_tree->parse($content); + + $template_content_id = Git::config("remote.${remote_name}.mwIDcontent") + || $template_content_id; + v_print("Using '${template_content_id}' as the content ID\n"); + + $mw_content_text = $html_tree->look_down('id', $template_content_id); + if (!defined $mw_content_text) { + print {*STDERR} <<"CONFIG"; +Could not combine the new content with the template. You might want to +configure `mediawiki.IDContent` in your config: + git config --add remote.${remote_name}.mwIDcontent <id> +and re-run the command afterward. +CONFIG + exit 1; + } + $mw_content_text->delete_content(); + $mw_content_text->push_content($content_tree); + + make_links_absolute($html_tree, $remote_url); + + return $html_tree->as_HTML; +} + +sub make_links_absolute { + my $html_tree = shift; + my $remote_url = shift; + for (@{ $html_tree->extract_links() }) { + my ($link, $element, $attr) = @{ $_ }; + my $url = url($link)->canonical; + if ($url !~ /#/) { + $element->attr($attr, URI->new_abs($url, $remote_url)); + } + } + return $html_tree; +} + +sub is_valid_remote { + my $remote = shift; + my @remotes = git_cmd_try { + Git::command('remote') } + "%s failed w/ code %d"; + my $found_remote = 0; + foreach my $remote (@remotes) { + if ($remote eq $remote) { + $found_remote = 1; + last; + } + } + return $found_remote; +} + +sub find_mediawiki_remotes { + my @remotes = git_cmd_try { + Git::command('remote'); } + "%s failed w/ code %d"; + my $remote_url; + my @valid_remotes = (); + foreach my $remote (@remotes) { + $remote_url = mediawiki_remote_url_maybe($remote); + if ($remote_url) { + push(@valid_remotes, $remote); + } + } + return @valid_remotes; +} + +sub find_upstream_remote_name { + my $current_branch = git_cmd_try { + Git::command_oneline('symbolic-ref', '--short', 'HEAD') } + "%s failed w/ code %d"; + return Git::config("branch.${current_branch}.remote"); +} + +sub mediawiki_remote_url_maybe { + my $remote = shift; + + # Find remote url + my $remote_url = Git::config("remote.${remote}.url"); + if ($remote_url =~ s/mediawiki::(.*)/$1/) { + return url($remote_url)->canonical; + } + + return; +} + +sub get_template { + my $url = shift; + my $page_name = shift; + my ($req, $res, $code, $url_after); + + $req = LWP::UserAgent->new; + if ($verbose) { + $req->show_progress(1); + } + + $res = $req->get("${url}/index.php?title=${page_name}"); + if (!$res->is_success) { + $code = $res->code; + $url_after = $res->request()->uri(); # resolve all redirections + if ($code == HTTP_CODE_PAGE_NOT_FOUND) { + if ($verbose) { + print {*STDERR} <<"WARNING"; +Warning: Failed to retrieve '$page_name'. Create it on the mediawiki if you want +all the links to work properly. +Trying to use the mediawiki homepage as a fallback template ... +WARNING + } + + # LWP automatically redirects GET request + $res = $req->get("${url}/index.php"); + if (!$res->is_success) { + $url_after = $res->request()->uri(); # resolve all redirections + die "Failed to get homepage @ ${url_after} w/ code ${code}\n"; + } + } else { + die "Failed to get '${page_name}' @ ${url_after} w/ code ${code}\n"; + } + } + + return $res->decoded_content; +} + +############################## Help Functions ################################## + +sub help { + print {*STDOUT} <<'END'; +usage: git mw <command> <args> + +git mw commands are: + help Display help information about git mw + preview Parse and render local file into HTML +END + exit; +} diff --git a/contrib/mw-to-git/git-remote-mediawiki b/contrib/mw-to-git/git-remote-mediawiki.perl index 68555d4265..8dd74a9a40 100755 --- a/contrib/mw-to-git/git-remote-mediawiki +++ b/contrib/mw-to-git/git-remote-mediawiki.perl @@ -13,22 +13,20 @@ use strict; use MediaWiki::API; +use Git; +use Git::Mediawiki qw(clean_filename smudge_filename connect_maybe + EMPTY HTTP_CODE_OK); use DateTime::Format::ISO8601; +use warnings; # By default, use UTF-8 to communicate with Git and the user -binmode STDERR, ":utf8"; -binmode STDOUT, ":utf8"; +binmode STDERR, ':encoding(UTF-8)'; +binmode STDOUT, ':encoding(UTF-8)'; use URI::Escape; -use IPC::Open2; - -use warnings; - -# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced -use constant SLASH_REPLACEMENT => "%2F"; # It's not always possible to delete pages (may require some -# priviledges). Deleted pages are replaced with this content. +# privileges). Deleted pages are replaced with this content. use constant DELETED_CONTENT => "[[Category:Deleted]]\n"; # It's not possible to create empty pages. New empty files in Git are @@ -36,45 +34,57 @@ use constant DELETED_CONTENT => "[[Category:Deleted]]\n"; use constant EMPTY_CONTENT => "<!-- empty page -->\n"; # used to reflect file creation or deletion in diff. -use constant NULL_SHA1 => "0000000000000000000000000000000000000000"; +use constant NULL_SHA1 => '0000000000000000000000000000000000000000'; # Used on Git's side to reflect empty edit messages on the wiki use constant EMPTY_MESSAGE => '*Empty MediaWiki Message*'; +# Number of pages taken into account at once in submodule get_mw_page_list +use constant SLICE_SIZE => 50; + +# Number of linked mediafile to get at once in get_linked_mediafiles +# The query is split in small batches because of the MW API limit of +# the number of links to be returned (500 links max). +use constant BATCH_SIZE => 10; + +if (@ARGV != 2) { + exit_error_usage(); +} + my $remotename = $ARGV[0]; my $url = $ARGV[1]; # Accept both space-separated and multiple keys in config file. # Spaces should be written as _ anyway because we'll use chomp. -my @tracked_pages = split(/[ \n]/, run_git("config --get-all remote.". $remotename .".pages")); +my @tracked_pages = split(/[ \n]/, run_git("config --get-all remote.${remotename}.pages")); chomp(@tracked_pages); # Just like @tracked_pages, but for MediaWiki categories. -my @tracked_categories = split(/[ \n]/, run_git("config --get-all remote.". $remotename .".categories")); +my @tracked_categories = split(/[ \n]/, run_git("config --get-all remote.${remotename}.categories")); chomp(@tracked_categories); # Import media files on pull -my $import_media = run_git("config --get --bool remote.". $remotename .".mediaimport"); +my $import_media = run_git("config --get --bool remote.${remotename}.mediaimport"); chomp($import_media); -$import_media = ($import_media eq "true"); +$import_media = ($import_media eq 'true'); # Export media files on push -my $export_media = run_git("config --get --bool remote.". $remotename .".mediaexport"); +my $export_media = run_git("config --get --bool remote.${remotename}.mediaexport"); chomp($export_media); -$export_media = !($export_media eq "false"); +$export_media = !($export_media eq 'false'); -my $wiki_login = run_git("config --get remote.". $remotename .".mwLogin"); +my $wiki_login = run_git("config --get remote.${remotename}.mwLogin"); # Note: mwPassword is discourraged. Use the credential system instead. -my $wiki_passwd = run_git("config --get remote.". $remotename .".mwPassword"); -my $wiki_domain = run_git("config --get remote.". $remotename .".mwDomain"); +my $wiki_passwd = run_git("config --get remote.${remotename}.mwPassword"); +my $wiki_domain = run_git("config --get remote.${remotename}.mwDomain"); chomp($wiki_login); chomp($wiki_passwd); chomp($wiki_domain); # Import only last revisions (both for clone and fetch) -my $shallow_import = run_git("config --get --bool remote.". $remotename .".shallow"); +my $shallow_import = run_git("config --get --bool remote.${remotename}.shallow"); chomp($shallow_import); -$shallow_import = ($shallow_import eq "true"); +$shallow_import = ($shallow_import eq 'true'); # Fetch (clone and pull) by revisions instead of by pages. This behavior # is more efficient when we have a wiki with lots of pages and we fetch @@ -82,15 +92,18 @@ $shallow_import = ($shallow_import eq "true"); # Possible values: # - by_rev: perform one query per new revision on the remote wiki # - by_page: query each tracked page for new revision -my $fetch_strategy = run_git("config --get remote.$remotename.fetchStrategy"); -unless ($fetch_strategy) { - $fetch_strategy = run_git("config --get mediawiki.fetchStrategy"); +my $fetch_strategy = run_git("config --get remote.${remotename}.fetchStrategy"); +if (!$fetch_strategy) { + $fetch_strategy = run_git('config --get mediawiki.fetchStrategy'); } chomp($fetch_strategy); -unless ($fetch_strategy) { - $fetch_strategy = "by_page"; +if (!$fetch_strategy) { + $fetch_strategy = 'by_page'; } +# Remember the timestamp corresponding to a revision id. +my %basetimestamps; + # Dumb push: don't update notes and mediawiki ref to reflect the last push. # # Configurable with mediawiki.dumbPush, or per-remote with @@ -105,48 +118,25 @@ unless ($fetch_strategy) { # will get the history with information lost). If the import is # deterministic, this means everybody gets the same sha1 for each # MediaWiki revision. -my $dumb_push = run_git("config --get --bool remote.$remotename.dumbPush"); -unless ($dumb_push) { - $dumb_push = run_git("config --get --bool mediawiki.dumbPush"); +my $dumb_push = run_git("config --get --bool remote.${remotename}.dumbPush"); +if (!$dumb_push) { + $dumb_push = run_git('config --get --bool mediawiki.dumbPush'); } chomp($dumb_push); -$dumb_push = ($dumb_push eq "true"); +$dumb_push = ($dumb_push eq 'true'); my $wiki_name = $url; -$wiki_name =~ s/[^\/]*:\/\///; +$wiki_name =~ s{[^/]*://}{}; # If URL is like http://user:password@example.com/, we clearly don't # want the password in $wiki_name. While we're there, also remove user # and '@' sign, to avoid author like MWUser@HTTPUser@host.com $wiki_name =~ s/^.*@//; # Commands parser -my $entry; -my @cmd; while (<STDIN>) { chomp; - @cmd = split(/ /); - if (defined($cmd[0])) { - # Line not blank - if ($cmd[0] eq "capabilities") { - die("Too many arguments for capabilities") unless (!defined($cmd[1])); - mw_capabilities(); - } elsif ($cmd[0] eq "list") { - die("Too many arguments for list") unless (!defined($cmd[2])); - mw_list($cmd[1]); - } elsif ($cmd[0] eq "import") { - die("Invalid arguments for import") unless ($cmd[1] ne "" && !defined($cmd[2])); - mw_import($cmd[1]); - } elsif ($cmd[0] eq "option") { - die("Too many arguments for option") unless ($cmd[1] ne "" && $cmd[2] ne "" && !defined($cmd[3])); - mw_option($cmd[1],$cmd[2]); - } elsif ($cmd[0] eq "push") { - mw_push($cmd[1]); - } else { - print STDERR "Unknown command. Aborting...\n"; - last; - } - } else { - # blank line: we should terminate + + if (!parse_command($_)) { last; } @@ -156,107 +146,91 @@ while (<STDIN>) { ########################## Functions ############################## -## credential API management (generic functions) - -sub credential_read { - my %credential; - my $reader = shift; - my $op = shift; - while (<$reader>) { - my ($key, $value) = /([^=]*)=(.*)/; - if (not defined $key) { - die "ERROR receiving response from git credential $op:\n$_\n"; - } - $credential{$key} = $value; - } - return %credential; +## error handling +sub exit_error_usage { + die "ERROR: git-remote-mediawiki module was not called with a correct number of\n" . + "parameters\n" . + "You may obtain this error because you attempted to run the git-remote-mediawiki\n" . + "module directly.\n" . + "This module can be used the following way:\n" . + "\tgit clone mediawiki://<address of a mediawiki>\n" . + "Then, use git commit, push and pull as with every normal git repository.\n"; } -sub credential_write { - my $credential = shift; - my $writer = shift; - # url overwrites other fields, so it must come first - print $writer "url=$credential->{url}\n" if exists $credential->{url}; - while (my ($key, $value) = each(%$credential) ) { - if (length $value && $key ne 'url') { - print $writer "$key=$value\n"; - } +sub parse_command { + my ($line) = @_; + my @cmd = split(/ /, $line); + if (!defined $cmd[0]) { + return 0; } -} - -sub credential_run { - my $op = shift; - my $credential = shift; - my $pid = open2(my $reader, my $writer, "git credential $op"); - credential_write($credential, $writer); - print $writer "\n"; - close($writer); - - if ($op eq "fill") { - %$credential = credential_read($reader, $op); + if ($cmd[0] eq 'capabilities') { + die("Too many arguments for capabilities\n") + if (defined($cmd[1])); + mw_capabilities(); + } elsif ($cmd[0] eq 'list') { + die("Too many arguments for list\n") if (defined($cmd[2])); + mw_list($cmd[1]); + } elsif ($cmd[0] eq 'import') { + die("Invalid argument for import\n") + if ($cmd[1] eq EMPTY); + die("Too many arguments for import\n") + if (defined($cmd[2])); + mw_import($cmd[1]); + } elsif ($cmd[0] eq 'option') { + die("Invalid arguments for option\n") + if ($cmd[1] eq EMPTY || $cmd[2] eq EMPTY); + die("Too many arguments for option\n") + if (defined($cmd[3])); + mw_option($cmd[1],$cmd[2]); + } elsif ($cmd[0] eq 'push') { + mw_push($cmd[1]); } else { - if (<$reader>) { - die "ERROR while running git credential $op:\n$_"; - } - } - close($reader); - waitpid($pid, 0); - my $child_exit_status = $? >> 8; - if ($child_exit_status != 0) { - die "'git credential $op' failed with code $child_exit_status."; + print {*STDERR} "Unknown command. Aborting...\n"; + return 0; } + return 1; } # MediaWiki API instance, created lazily. my $mediawiki; -sub mw_connect_maybe { - if ($mediawiki) { - return; - } - $mediawiki = MediaWiki::API->new; - $mediawiki->{config}->{api_url} = "$url/api.php"; - if ($wiki_login) { - my %credential = (url => $url); - $credential{username} = $wiki_login; - $credential{password} = $wiki_passwd; - credential_run("fill", \%credential); - my $request = {lgname => $credential{username}, - lgpassword => $credential{password}, - lgdomain => $wiki_domain}; - if ($mediawiki->login($request)) { - credential_run("approve", \%credential); - print STDERR "Logged in mediawiki user \"$credential{username}\".\n"; - } else { - print STDERR "Failed to log in mediawiki user \"$credential{username}\" on $url\n"; - print STDERR " (error " . - $mediawiki->{error}->{code} . ': ' . - $mediawiki->{error}->{details} . ")\n"; - credential_run("reject", \%credential); - exit 1; - } +sub fatal_mw_error { + my $action = shift; + print STDERR "fatal: could not $action.\n"; + print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; + if ($url =~ /^https/) { + print STDERR "fatal: make sure '$url/api.php' is a valid page\n"; + print STDERR "fatal: and the SSL certificate is correct.\n"; + } else { + print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; } + print STDERR "fatal: (error " . + $mediawiki->{error}->{code} . ': ' . + $mediawiki->{error}->{details} . ")\n"; + exit 1; } ## Functions for listing pages on the remote wiki sub get_mw_tracked_pages { my $pages = shift; get_mw_page_list(\@tracked_pages, $pages); + return; } sub get_mw_page_list { my $page_list = shift; my $pages = shift; - my @some_pages = @$page_list; + my @some_pages = @{$page_list}; while (@some_pages) { - my $last = 50; - if ($#some_pages < $last) { - $last = $#some_pages; + my $last_page = SLICE_SIZE; + if ($#some_pages < $last_page) { + $last_page = $#some_pages; } - my @slice = @some_pages[0..$last]; + my @slice = @some_pages[0..$last_page]; get_mw_first_pages(\@slice, $pages); - @some_pages = @some_pages[51..$#some_pages]; + @some_pages = @some_pages[(SLICE_SIZE + 1)..$#some_pages]; } + return; } sub get_mw_tracked_categories { @@ -266,7 +240,7 @@ sub get_mw_tracked_categories { # Mediawiki requires the Category # prefix, but let's not force the user # to specify it. - $category = "Category:" . $category; + $category = "Category:${category}"; } my $mw_pages = $mediawiki->list( { action => 'query', @@ -274,11 +248,12 @@ sub get_mw_tracked_categories { cmtitle => $category, cmlimit => 'max' } ) || die $mediawiki->{error}->{code} . ': ' - . $mediawiki->{error}->{details}; + . $mediawiki->{error}->{details} . "\n"; foreach my $page (@{$mw_pages}) { $pages->{$page->{title}} = $page; } } + return; } sub get_mw_all_pages { @@ -290,14 +265,12 @@ sub get_mw_all_pages { aplimit => 'max' }); if (!defined($mw_pages)) { - print STDERR "fatal: could not get the list of wiki pages.\n"; - print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; - print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; - exit 1; + fatal_mw_error("get the list of wiki pages"); } foreach my $page (@{$mw_pages}) { $pages->{$page->{title}} = $page; } + return; } # queries the wiki for a set of pages. Meant to be used within a loop @@ -316,25 +289,23 @@ sub get_mw_first_pages { titles => $titles, }); if (!defined($mw_pages)) { - print STDERR "fatal: could not query the list of wiki pages.\n"; - print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; - print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; - exit 1; + fatal_mw_error("query the list of wiki pages"); } while (my ($id, $page) = each(%{$mw_pages->{query}->{pages}})) { if ($id < 0) { - print STDERR "Warning: page $page->{title} not found on wiki\n"; + print {*STDERR} "Warning: page $page->{title} not found on wiki\n"; } else { $pages->{$page->{title}} = $page; } } + return; } # Get the list of pages to be fetched according to configuration. sub get_mw_pages { - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); - print STDERR "Listing pages on remote wiki...\n"; + print {*STDERR} "Listing pages on remote wiki...\n"; my %pages; # hash on page titles to avoid duplicates my $user_defined; @@ -352,14 +323,14 @@ sub get_mw_pages { get_mw_all_pages(\%pages); } if ($import_media) { - print STDERR "Getting media files for selected pages...\n"; + print {*STDERR} "Getting media files for selected pages...\n"; if ($user_defined) { get_linked_mediafiles(\%pages); } else { get_all_mediafiles(\%pages); } } - print STDERR (scalar keys %pages) . " pages found.\n"; + print {*STDERR} (scalar keys %pages) . " pages found.\n"; return %pages; } @@ -367,9 +338,13 @@ sub get_mw_pages { # $out = run_git("command args", "raw"); # don't interpret output as UTF-8. sub run_git { my $args = shift; - my $encoding = (shift || "encoding(UTF-8)"); - open(my $git, "-|:$encoding", "git " . $args); - my $res = do { local $/; <$git> }; + my $encoding = (shift || 'encoding(UTF-8)'); + open(my $git, "-|:${encoding}", "git ${args}") + or die "Unable to fork: $!\n"; + my $res = do { + local $/ = undef; + <$git> + }; close($git); return $res; @@ -384,27 +359,26 @@ sub get_all_mediafiles { my $mw_pages = $mediawiki->list({ action => 'query', list => 'allpages', - apnamespace => get_mw_namespace_id("File"), + apnamespace => get_mw_namespace_id('File'), aplimit => 'max' }); if (!defined($mw_pages)) { - print STDERR "fatal: could not get the list of pages for media files.\n"; - print STDERR "fatal: '$url' does not appear to be a mediawiki\n"; - print STDERR "fatal: make sure '$url/api.php' is a valid page.\n"; + print {*STDERR} "fatal: could not get the list of pages for media files.\n"; + print {*STDERR} "fatal: '$url' does not appear to be a mediawiki\n"; + print {*STDERR} "fatal: make sure '$url/api.php' is a valid page.\n"; exit 1; } foreach my $page (@{$mw_pages}) { $pages->{$page->{title}} = $page; } + return; } sub get_linked_mediafiles { my $pages = shift; - my @titles = map $_->{title}, values(%{$pages}); + my @titles = map { $_->{title} } values(%{$pages}); - # The query is split in small batches because of the MW API limit of - # the number of links to be returned (500 links max). - my $batch = 10; + my $batch = BATCH_SIZE; while (@titles) { if ($#titles < $batch) { $batch = $#titles; @@ -420,7 +394,7 @@ sub get_linked_mediafiles { action => 'query', prop => 'links|images', titles => $mw_titles, - plnamespace => get_mw_namespace_id("File"), + plnamespace => get_mw_namespace_id('File'), pllimit => 'max' }; my $result = $mediawiki->api($query); @@ -428,11 +402,13 @@ sub get_linked_mediafiles { while (my ($id, $page) = each(%{$result->{query}->{pages}})) { my @media_titles; if (defined($page->{links})) { - my @link_titles = map $_->{title}, @{$page->{links}}; + my @link_titles + = map { $_->{title} } @{$page->{links}}; push(@media_titles, @link_titles); } if (defined($page->{images})) { - my @image_titles = map $_->{title}, @{$page->{images}}; + my @image_titles + = map { $_->{title} } @{$page->{images}}; push(@media_titles, @image_titles); } if (@media_titles) { @@ -442,6 +418,7 @@ sub get_linked_mediafiles { @titles = @titles[($batch+1)..$#titles]; } + return; } sub get_mw_mediafile_for_page_revision { @@ -455,7 +432,7 @@ sub get_mw_mediafile_for_page_revision { my $query = { action => 'query', prop => 'imageinfo', - titles => "File:" . $filename, + titles => "File:${filename}", iistart => $timestamp, iiend => $timestamp, iiprop => 'timestamp|archivename|url', @@ -473,53 +450,55 @@ sub get_mw_mediafile_for_page_revision { $mediafile{timestamp} = $fileinfo->{timestamp}; # Mediawiki::API's download function doesn't support https URLs # and can't download old versions of files. - print STDERR "\tDownloading file $mediafile{title}, version $mediafile{timestamp}\n"; + print {*STDERR} "\tDownloading file $mediafile{title}, version $mediafile{timestamp}\n"; $mediafile{content} = download_mw_mediafile($fileinfo->{url}); } return %mediafile; } sub download_mw_mediafile { - my $url = shift; - - my $response = $mediawiki->{ua}->get($url); - if ($response->code == 200) { - return $response->decoded_content; + my $download_url = shift; + + my $response = $mediawiki->{ua}->get($download_url); + if ($response->code == HTTP_CODE_OK) { + # It is tempting to return + # $response->decoded_content({charset => "none"}), but + # when doing so, utf8::downgrade($content) fails with + # "Wide character in subroutine entry". + $response->decode(); + return $response->content(); } else { - print STDERR "Error downloading mediafile from :\n"; - print STDERR "URL: $url\n"; - print STDERR "Server response: " . $response->code . " " . $response->message . "\n"; + print {*STDERR} "Error downloading mediafile from :\n"; + print {*STDERR} "URL: ${download_url}\n"; + print {*STDERR} 'Server response: ' . $response->code . q{ } . $response->message . "\n"; exit 1; } } sub get_last_local_revision { # Get note regarding last mediawiki revision - my $note = run_git("notes --ref=$remotename/mediawiki show refs/mediawiki/$remotename/master 2>/dev/null"); + my $note = run_git("notes --ref=${remotename}/mediawiki show refs/mediawiki/${remotename}/master 2>/dev/null"); my @note_info = split(/ /, $note); my $lastrevision_number; - if (!(defined($note_info[0]) && $note_info[0] eq "mediawiki_revision:")) { - print STDERR "No previous mediawiki revision found"; + if (!(defined($note_info[0]) && $note_info[0] eq 'mediawiki_revision:')) { + print {*STDERR} 'No previous mediawiki revision found'; $lastrevision_number = 0; } else { # Notes are formatted : mediawiki_revision: #number $lastrevision_number = $note_info[1]; chomp($lastrevision_number); - print STDERR "Last local mediawiki revision found is $lastrevision_number"; + print {*STDERR} "Last local mediawiki revision found is ${lastrevision_number}"; } return $lastrevision_number; } -# Remember the timestamp corresponding to a revision id. -my %basetimestamps; - # Get the last remote revision without taking in account which pages are # tracked or not. This function makes a single request to the wiki thus # avoid a loop onto all tracked pages. This is useful for the fetch-by-rev # option. sub get_last_global_remote_rev { - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); my $query = { action => 'query', @@ -535,14 +514,14 @@ sub get_last_global_remote_rev { # Get the last remote revision concerning the tracked pages and the tracked # categories. sub get_last_remote_revision { - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); my %pages_hash = get_mw_pages(); my @pages = values(%pages_hash); my $max_rev_num = 0; - print STDERR "Getting last revision id on tracked pages...\n"; + print {*STDERR} "Getting last revision id on tracked pages...\n"; foreach my $page (@pages) { my $id = $page->{pageid}; @@ -563,7 +542,7 @@ sub get_last_remote_revision { $max_rev_num = ($lastrev->{revid} > $max_rev_num ? $lastrev->{revid} : $max_rev_num); } - print STDERR "Last remote revision found is $max_rev_num.\n"; + print {*STDERR} "Last remote revision found is $max_rev_num.\n"; return $max_rev_num; } @@ -574,7 +553,7 @@ sub mediawiki_clean { # Mediawiki does not allow blank space at the end of a page and ends with a single \n. # This function right trims a string and adds a \n at the end to follow this rule $string =~ s/\s+$//; - if ($string eq "" && $page_created) { + if ($string eq EMPTY && $page_created) { # Creating empty pages is forbidden. $string = EMPTY_CONTENT; } @@ -585,38 +564,16 @@ sub mediawiki_clean { sub mediawiki_smudge { my $string = shift; if ($string eq EMPTY_CONTENT) { - $string = ""; + $string = EMPTY; } # This \n is important. This is due to mediawiki's way to handle end of files. - return $string."\n"; -} - -sub mediawiki_clean_filename { - my $filename = shift; - $filename =~ s/@{[SLASH_REPLACEMENT]}/\//g; - # [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded. - # Do a variant of URL-encoding, i.e. looks like URL-encoding, - # but with _ added to prevent MediaWiki from thinking this is - # an actual special character. - $filename =~ s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge; - # If we use the uri escape before - # we should unescape here, before anything - - return $filename; -} - -sub mediawiki_smudge_filename { - my $filename = shift; - $filename =~ s/\//@{[SLASH_REPLACEMENT]}/g; - $filename =~ s/ /_/g; - # Decode forbidden characters encoded in mediawiki_clean_filename - $filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge; - return $filename; + return "${string}\n"; } sub literal_data { my ($content) = @_; - print STDOUT "data ", bytes::length($content), "\n", $content; + print {*STDOUT} 'data ', bytes::length($content), "\n", $content; + return; } sub literal_data_raw { @@ -624,33 +581,40 @@ sub literal_data_raw { my ($content) = @_; # Avoid confusion between size in bytes and in characters utf8::downgrade($content); - binmode STDOUT, ":raw"; - print STDOUT "data ", bytes::length($content), "\n", $content; - binmode STDOUT, ":utf8"; + binmode STDOUT, ':raw'; + print {*STDOUT} 'data ', bytes::length($content), "\n", $content; + binmode STDOUT, ':encoding(UTF-8)'; + return; } sub mw_capabilities { # Revisions are imported to the private namespace # refs/mediawiki/$remotename/ by the helper and fetched into # refs/remotes/$remotename later by fetch. - print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n"; - print STDOUT "import\n"; - print STDOUT "list\n"; - print STDOUT "push\n"; - print STDOUT "\n"; + print {*STDOUT} "refspec refs/heads/*:refs/mediawiki/${remotename}/*\n"; + print {*STDOUT} "import\n"; + print {*STDOUT} "list\n"; + print {*STDOUT} "push\n"; + if ($dumb_push) { + print {*STDOUT} "no-private-update\n"; + } + print {*STDOUT} "\n"; + return; } sub mw_list { # MediaWiki do not have branches, we consider one branch arbitrarily # called master, and HEAD pointing to it. - print STDOUT "? refs/heads/master\n"; - print STDOUT "\@refs/heads/master HEAD\n"; - print STDOUT "\n"; + print {*STDOUT} "? refs/heads/master\n"; + print {*STDOUT} "\@refs/heads/master HEAD\n"; + print {*STDOUT} "\n"; + return; } sub mw_option { - print STDERR "remote-helper command 'option $_[0]' not yet implemented\n"; - print STDOUT "unsupported\n"; + print {*STDERR} "remote-helper command 'option $_[0]' not yet implemented\n"; + print {*STDOUT} "unsupported\n"; + return; } sub fetch_mw_revisions_for_page { @@ -666,6 +630,9 @@ sub fetch_mw_revisions_for_page { rvstartid => $fetch_from, rvlimit => 500, pageids => $id, + + # Let MediaWiki know that we support the latest API. + continue => '', }; my $revnum = 0; @@ -681,15 +648,22 @@ sub fetch_mw_revisions_for_page { push(@page_revs, $page_rev_ids); $revnum++; } - last unless $result->{'query-continue'}; - $query->{rvstartid} = $result->{'query-continue'}->{revisions}->{rvstartid}; + + if ($result->{'query-continue'}) { # For legacy APIs + $query->{rvstartid} = $result->{'query-continue'}->{revisions}->{rvstartid}; + } elsif ($result->{continue}) { # For newer APIs + $query->{rvstartid} = $result->{continue}->{rvcontinue}; + $query->{continue} = $result->{continue}->{continue}; + } else { + last; + } } if ($shallow_import && @page_revs) { - print STDERR " Found 1 revision (shallow import).\n"; + print {*STDERR} " Found 1 revision (shallow import).\n"; @page_revs = sort {$b->{revid} <=> $a->{revid}} (@page_revs); return $page_revs[0]; } - print STDERR " Found ", $revnum, " revision(s).\n"; + print {*STDERR} " Found ${revnum} revision(s).\n"; return @page_revs; } @@ -701,8 +675,7 @@ sub fetch_mw_revisions { my $n = 1; foreach my $page (@pages) { my $id = $page->{pageid}; - - print STDERR "page $n/", scalar(@pages), ": ". $page->{title} ."\n"; + print {*STDERR} "page ${n}/", scalar(@pages), ': ', $page->{title}, "\n"; $n++; my @page_revs = fetch_mw_revisions_for_page($page, $id, $fetch_from); @revisions = (@page_revs, @revisions); @@ -711,6 +684,14 @@ sub fetch_mw_revisions { return ($n, @revisions); } +sub fe_escape_path { + my $path = shift; + $path =~ s/\\/\\\\/g; + $path =~ s/"/\\"/g; + $path =~ s/\n/\\n/g; + return qq("${path}"); +} + sub import_file_revision { my $commit = shift; my %commit = %{$commit}; @@ -728,40 +709,43 @@ sub import_file_revision { my $author = $commit{author}; my $date = $commit{date}; - print STDOUT "commit refs/mediawiki/$remotename/master\n"; - print STDOUT "mark :$n\n"; - print STDOUT "committer $author <$author\@$wiki_name> ", $date->epoch, " +0000\n"; + print {*STDOUT} "commit refs/mediawiki/${remotename}/master\n"; + print {*STDOUT} "mark :${n}\n"; + print {*STDOUT} "committer ${author} <${author}\@${wiki_name}> " . $date->epoch . " +0000\n"; literal_data($comment); # If it's not a clone, we need to know where to start from if (!$full_import && $n == 1) { - print STDOUT "from refs/mediawiki/$remotename/master^0\n"; + print {*STDOUT} "from refs/mediawiki/${remotename}/master^0\n"; } if ($content ne DELETED_CONTENT) { - print STDOUT "M 644 inline $title.mw\n"; + print {*STDOUT} 'M 644 inline ' . + fe_escape_path("${title}.mw") . "\n"; literal_data($content); if (%mediafile) { - print STDOUT "M 644 inline $mediafile{title}\n"; + print {*STDOUT} 'M 644 inline ' + . fe_escape_path($mediafile{title}) . "\n"; literal_data_raw($mediafile{content}); } - print STDOUT "\n\n"; + print {*STDOUT} "\n\n"; } else { - print STDOUT "D $title.mw\n"; + print {*STDOUT} 'D ' . fe_escape_path("${title}.mw") . "\n"; } # mediawiki revision number in the git note if ($full_import && $n == 1) { - print STDOUT "reset refs/notes/$remotename/mediawiki\n"; + print {*STDOUT} "reset refs/notes/${remotename}/mediawiki\n"; } - print STDOUT "commit refs/notes/$remotename/mediawiki\n"; - print STDOUT "committer $author <$author\@$wiki_name> ", $date->epoch, " +0000\n"; - literal_data("Note added by git-mediawiki during import"); + print {*STDOUT} "commit refs/notes/${remotename}/mediawiki\n"; + print {*STDOUT} "committer ${author} <${author}\@${wiki_name}> " . $date->epoch . " +0000\n"; + literal_data('Note added by git-mediawiki during import'); if (!$full_import && $n == 1) { - print STDOUT "from refs/notes/$remotename/mediawiki^0\n"; + print {*STDOUT} "from refs/notes/${remotename}/mediawiki^0\n"; } - print STDOUT "N inline :$n\n"; - literal_data("mediawiki_revision: " . $commit{mw_revision}); - print STDOUT "\n\n"; + print {*STDOUT} "N inline :${n}\n"; + literal_data("mediawiki_revision: $commit{mw_revision}"); + print {*STDOUT} "\n\n"; + return; } # parse a sequence of @@ -774,23 +758,25 @@ sub get_more_refs { my @refs; while (1) { my $line = <STDIN>; - if ($line =~ m/^$cmd (.*)$/) { + if ($line =~ /^$cmd (.*)$/) { push(@refs, $1); } elsif ($line eq "\n") { return @refs; } else { - die("Invalid command in a '$cmd' batch: ". $_); + die("Invalid command in a '$cmd' batch: $_\n"); } } + return; } sub mw_import { # multiple import commands can follow each other. - my @refs = (shift, get_more_refs("import")); + my @refs = (shift, get_more_refs('import')); foreach my $ref (@refs) { mw_import_ref($ref); } - print STDOUT "done\n"; + print {*STDOUT} "done\n"; + return; } sub mw_import_ref { @@ -800,40 +786,41 @@ sub mw_import_ref { # Since HEAD is a symbolic ref to master (by convention, # followed by the output of the command "list" that we gave), # we don't need to do anything in this case. - if ($ref eq "HEAD") { + if ($ref eq 'HEAD') { return; } - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); - print STDERR "Searching revisions...\n"; + print {*STDERR} "Searching revisions...\n"; my $last_local = get_last_local_revision(); my $fetch_from = $last_local + 1; if ($fetch_from == 1) { - print STDERR ", fetching from beginning.\n"; + print {*STDERR} ", fetching from beginning.\n"; } else { - print STDERR ", fetching from here.\n"; + print {*STDERR} ", fetching from here.\n"; } my $n = 0; - if ($fetch_strategy eq "by_rev") { - print STDERR "Fetching & writing export data by revs...\n"; + if ($fetch_strategy eq 'by_rev') { + print {*STDERR} "Fetching & writing export data by revs...\n"; $n = mw_import_ref_by_revs($fetch_from); - } elsif ($fetch_strategy eq "by_page") { - print STDERR "Fetching & writing export data by pages...\n"; + } elsif ($fetch_strategy eq 'by_page') { + print {*STDERR} "Fetching & writing export data by pages...\n"; $n = mw_import_ref_by_pages($fetch_from); } else { - print STDERR "fatal: invalid fetch strategy \"$fetch_strategy\".\n"; - print STDERR "Check your configuration variables remote.$remotename.fetchStrategy and mediawiki.fetchStrategy\n"; + print {*STDERR} qq(fatal: invalid fetch strategy "${fetch_strategy}".\n); + print {*STDERR} "Check your configuration variables remote.${remotename}.fetchStrategy and mediawiki.fetchStrategy\n"; exit 1; } if ($fetch_from == 1 && $n == 0) { - print STDERR "You appear to have cloned an empty MediaWiki.\n"; + print {*STDERR} "You appear to have cloned an empty MediaWiki.\n"; # Something has to be done remote-helper side. If nothing is done, an error is - # thrown saying that HEAD is refering to unknown object 0000000000000000000 + # thrown saying that HEAD is referring to unknown object 0000000000000000000 # and the clone fails. } + return; } sub mw_import_ref_by_pages { @@ -845,7 +832,7 @@ sub mw_import_ref_by_pages { my ($n, @revisions) = fetch_mw_revisions(\@pages, $fetch_from); @revisions = sort {$a->{revid} <=> $b->{revid}} @revisions; - my @revision_ids = map $_->{revid}, @revisions; + my @revision_ids = map { $_->{revid} } @revisions; return mw_import_revids($fetch_from, \@revision_ids, \%pages_hash); } @@ -872,7 +859,7 @@ sub mw_import_revids { my $n_actual = 0; my $last_timestamp = 0; # Placeholer in case $rev->timestamp is undefined - foreach my $pagerevid (@$revision_ids) { + foreach my $pagerevid (@{$revision_ids}) { # Count page even if we skip it, since we display # $n/$total and $total includes skipped pages. $n++; @@ -888,7 +875,7 @@ sub mw_import_revids { my $result = $mediawiki->api($query); if (!$result) { - die "Failed to retrieve modified page for revision $pagerevid"; + die "Failed to retrieve modified page for revision $pagerevid\n"; } if (defined($result->{query}->{badrevids}->{$pagerevid})) { @@ -897,7 +884,7 @@ sub mw_import_revids { } if (!defined($result->{query}->{pages})) { - die "Invalid revision $pagerevid."; + die "Invalid revision ${pagerevid}.\n"; } my @result_pages = values(%{$result->{query}->{pages}}); @@ -907,8 +894,8 @@ sub mw_import_revids { my $page_title = $result_page->{title}; if (!exists($pages->{$page_title})) { - print STDERR "$n/", scalar(@$revision_ids), - ": Skipping revision #$rev->{revid} of $page_title\n"; + print {*STDERR} "${n}/", scalar(@{$revision_ids}), + ": Skipping revision #$rev->{revid} of ${page_title}\n"; next; } @@ -917,7 +904,7 @@ sub mw_import_revids { my %commit; $commit{author} = $rev->{user} || 'Anonymous'; $commit{comment} = $rev->{comment} || EMPTY_MESSAGE; - $commit{title} = mediawiki_smudge_filename($page_title); + $commit{title} = smudge_filename($page_title); $commit{mw_revision} = $rev->{revid}; $commit{content} = mediawiki_smudge($rev->{'*'}); @@ -933,14 +920,14 @@ sub mw_import_revids { my %mediafile; if ($namespace) { my $id = get_mw_namespace_id($namespace); - if ($id && $id == get_mw_namespace_id("File")) { + if ($id && $id == get_mw_namespace_id('File')) { %mediafile = get_mw_mediafile_for_page_revision($filename, $rev->{timestamp}); } } # If this is a revision of the media page for new version # of a file do one common commit for both file and media page. # Else do commit only for that page. - print STDERR "$n/", scalar(@$revision_ids), ": Revision #$rev->{revid} of $commit{title}\n"; + print {*STDERR} "${n}/", scalar(@{$revision_ids}), ": Revision #$rev->{revid} of $commit{title}\n"; import_file_revision(\%commit, ($fetch_from == 1), $n_actual, \%mediafile); } @@ -948,17 +935,17 @@ sub mw_import_revids { } sub error_non_fast_forward { - my $advice = run_git("config --bool advice.pushNonFastForward"); + my $advice = run_git('config --bool advice.pushNonFastForward'); chomp($advice); - if ($advice ne "false") { + if ($advice ne 'false') { # Native git-push would show this after the summary. # We can't ask it to display it cleanly, so print it # ourselves before. - print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n"; - print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; - print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n"; + print {*STDERR} "To prevent you from losing history, non-fast-forward updates were rejected\n"; + print {*STDERR} "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n"; + print {*STDERR} "'Note about fast-forwards' section of 'git push --help' for details.\n"; } - print STDOUT "error $_[0] \"non-fast-forward\"\n"; + print {*STDOUT} qq(error $_[0] "non-fast-forward"\n); return 0; } @@ -969,34 +956,34 @@ sub mw_upload_file { my $file_deleted = shift; my $summary = shift; my $newrevid; - my $path = "File:" . $complete_file_name; + my $path = "File:${complete_file_name}"; my %hashFiles = get_allowed_file_extensions(); if (!exists($hashFiles{$extension})) { - print STDERR "$complete_file_name is not a permitted file on this wiki.\n"; - print STDERR "Check the configuration of file uploads in your mediawiki.\n"; + print {*STDERR} "${complete_file_name} is not a permitted file on this wiki.\n"; + print {*STDERR} "Check the configuration of file uploads in your mediawiki.\n"; return $newrevid; } # Deleting and uploading a file requires a priviledged user if ($file_deleted) { - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); my $query = { action => 'delete', title => $path, reason => $summary }; if (!$mediawiki->edit($query)) { - print STDERR "Failed to delete file on remote wiki\n"; - print STDERR "Check your permissions on the remote site. Error code:\n"; - print STDERR $mediawiki->{error}->{code} . ':' . $mediawiki->{error}->{details}; + print {*STDERR} "Failed to delete file on remote wiki\n"; + print {*STDERR} "Check your permissions on the remote site. Error code:\n"; + print {*STDERR} $mediawiki->{error}->{code} . ':' . $mediawiki->{error}->{details}; exit 1; } } else { # Don't let perl try to interpret file content as UTF-8 => use "raw" - my $content = run_git("cat-file blob $new_sha1", "raw"); - if ($content ne "") { - mw_connect_maybe(); + my $content = run_git("cat-file blob ${new_sha1}", 'raw'); + if ($content ne EMPTY) { + $mediawiki = connect_maybe($mediawiki, $remotename, $url); $mediawiki->{config}->{upload_url} = - "$url/index.php/Special:Upload"; + "${url}/index.php/Special:Upload"; $mediawiki->edit({ action => 'upload', filename => $complete_file_name, @@ -1008,12 +995,12 @@ sub mw_upload_file { }, { skip_encoding => 1 } ) || die $mediawiki->{error}->{code} . ':' - . $mediawiki->{error}->{details}; + . $mediawiki->{error}->{details} . "\n"; my $last_file_page = $mediawiki->get_page({title => $path}); $newrevid = $last_file_page->{revid}; - print STDERR "Pushed file: $new_sha1 - $complete_file_name.\n"; + print {*STDERR} "Pushed file: ${new_sha1} - ${complete_file_name}.\n"; } else { - print STDERR "Empty file $complete_file_name not pushed.\n"; + print {*STDERR} "Empty file ${complete_file_name} not pushed.\n"; } } return $newrevid; @@ -1035,37 +1022,37 @@ sub mw_push_file { my $newrevid; if ($summary eq EMPTY_MESSAGE) { - $summary = ''; + $summary = EMPTY; } my $new_sha1 = $diff_info_split[3]; my $old_sha1 = $diff_info_split[2]; my $page_created = ($old_sha1 eq NULL_SHA1); my $page_deleted = ($new_sha1 eq NULL_SHA1); - $complete_file_name = mediawiki_clean_filename($complete_file_name); + $complete_file_name = clean_filename($complete_file_name); my ($title, $extension) = $complete_file_name =~ /^(.*)\.([^\.]*)$/; if (!defined($extension)) { - $extension = ""; + $extension = EMPTY; } - if ($extension eq "mw") { + if ($extension eq 'mw') { my $ns = get_mw_namespace_id_for_page($complete_file_name); - if ($ns && $ns == get_mw_namespace_id("File") && (!$export_media)) { - print STDERR "Ignoring media file related page: $complete_file_name\n"; - return ($oldrevid, "ok"); + if ($ns && $ns == get_mw_namespace_id('File') && (!$export_media)) { + print {*STDERR} "Ignoring media file related page: ${complete_file_name}\n"; + return ($oldrevid, 'ok'); } my $file_content; if ($page_deleted) { # Deleting a page usually requires - # special priviledges. A common + # special privileges. A common # convention is to replace the page # with this content instead: $file_content = DELETED_CONTENT; } else { - $file_content = run_git("cat-file blob $new_sha1"); + $file_content = run_git("cat-file blob ${new_sha1}"); } - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); my $result = $mediawiki->edit( { action => 'edit', @@ -1079,49 +1066,49 @@ sub mw_push_file { if (!$result) { if ($mediawiki->{error}->{code} == 3) { # edit conflicts, considered as non-fast-forward - print STDERR 'Warning: Error ' . + print {*STDERR} 'Warning: Error ' . $mediawiki->{error}->{code} . - ' from mediwiki: ' . $mediawiki->{error}->{details} . + ' from mediawiki: ' . $mediawiki->{error}->{details} . ".\n"; - return ($oldrevid, "non-fast-forward"); + return ($oldrevid, 'non-fast-forward'); } else { # Other errors. Shouldn't happen => just die() die 'Fatal: Error ' . $mediawiki->{error}->{code} . - ' from mediwiki: ' . $mediawiki->{error}->{details}; + ' from mediawiki: ' . $mediawiki->{error}->{details} . "\n"; } } $newrevid = $result->{edit}->{newrevid}; - print STDERR "Pushed file: $new_sha1 - $title\n"; + print {*STDERR} "Pushed file: ${new_sha1} - ${title}\n"; } elsif ($export_media) { $newrevid = mw_upload_file($complete_file_name, $new_sha1, $extension, $page_deleted, $summary); } else { - print STDERR "Ignoring media file $title\n"; + print {*STDERR} "Ignoring media file ${title}\n"; } $newrevid = ($newrevid or $oldrevid); - return ($newrevid, "ok"); + return ($newrevid, 'ok'); } sub mw_push { # multiple push statements can follow each other - my @refsspecs = (shift, get_more_refs("push")); + my @refsspecs = (shift, get_more_refs('push')); my $pushed; for my $refspec (@refsspecs) { my ($force, $local, $remote) = $refspec =~ /^(\+)?([^:]*):([^:]*)$/ - or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>"); + or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>\n"); if ($force) { - print STDERR "Warning: forced push not allowed on a MediaWiki.\n"; + print {*STDERR} "Warning: forced push not allowed on a MediaWiki.\n"; } - if ($local eq "") { - print STDERR "Cannot delete remote branch on a MediaWiki\n"; - print STDOUT "error $remote cannot delete\n"; + if ($local eq EMPTY) { + print {*STDERR} "Cannot delete remote branch on a MediaWiki\n"; + print {*STDOUT} "error ${remote} cannot delete\n"; next; } - if ($remote ne "refs/heads/master") { - print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n"; - print STDOUT "error $remote only master allowed\n"; + if ($remote ne 'refs/heads/master') { + print {*STDERR} "Only push to the branch 'master' is supported on a MediaWiki\n"; + print {*STDOUT} "error ${remote} only master allowed\n"; next; } if (mw_push_revision($local, $remote)) { @@ -1130,30 +1117,32 @@ sub mw_push { } # Notify Git that the push is done - print STDOUT "\n"; + print {*STDOUT} "\n"; if ($pushed && $dumb_push) { - print STDERR "Just pushed some revisions to MediaWiki.\n"; - print STDERR "The pushed revisions now have to be re-imported, and your current branch\n"; - print STDERR "needs to be updated with these re-imported commits. You can do this with\n"; - print STDERR "\n"; - print STDERR " git pull --rebase\n"; - print STDERR "\n"; + print {*STDERR} "Just pushed some revisions to MediaWiki.\n"; + print {*STDERR} "The pushed revisions now have to be re-imported, and your current branch\n"; + print {*STDERR} "needs to be updated with these re-imported commits. You can do this with\n"; + print {*STDERR} "\n"; + print {*STDERR} " git pull --rebase\n"; + print {*STDERR} "\n"; } + return; } sub mw_push_revision { my $local = shift; my $remote = shift; # actually, this has to be "refs/heads/master" at this point. my $last_local_revid = get_last_local_revision(); - print STDERR ".\n"; # Finish sentence started by get_last_local_revision() + print {*STDERR} ".\n"; # Finish sentence started by get_last_local_revision() my $last_remote_revid = get_last_remote_revision(); my $mw_revision = $last_remote_revid; # Get sha1 of commit pointed by local HEAD - my $HEAD_sha1 = run_git("rev-parse $local 2>/dev/null"); chomp($HEAD_sha1); + my $HEAD_sha1 = run_git("rev-parse ${local} 2>/dev/null"); + chomp($HEAD_sha1); # Get sha1 of commit pointed by remotes/$remotename/master - my $remoteorigin_sha1 = run_git("rev-parse refs/remotes/$remotename/master 2>/dev/null"); + my $remoteorigin_sha1 = run_git("rev-parse refs/remotes/${remotename}/master 2>/dev/null"); chomp($remoteorigin_sha1); if ($last_local_revid > 0 && @@ -1172,22 +1161,22 @@ sub mw_push_revision { if ($last_local_revid > 0) { my $parsed_sha1 = $remoteorigin_sha1; # Find a path from last MediaWiki commit to pushed commit - print STDERR "Computing path from local to remote ...\n"; - my @local_ancestry = split(/\n/, run_git("rev-list --boundary --parents $local ^$parsed_sha1")); + print {*STDERR} "Computing path from local to remote ...\n"; + my @local_ancestry = split(/\n/, run_git("rev-list --boundary --parents ${local} ^${parsed_sha1}")); my %local_ancestry; foreach my $line (@local_ancestry) { - if (my ($child, $parents) = $line =~ m/^-?([a-f0-9]+) ([a-f0-9 ]+)/) { - foreach my $parent (split(' ', $parents)) { + if (my ($child, $parents) = $line =~ /^-?([a-f0-9]+) ([a-f0-9 ]+)/) { + foreach my $parent (split(/ /, $parents)) { $local_ancestry{$parent} = $child; } - } elsif (!$line =~ m/^([a-f0-9]+)/) { - die "Unexpected output from git rev-list: $line"; + } elsif (!$line =~ /^([a-f0-9]+)/) { + die "Unexpected output from git rev-list: ${line}\n"; } } while ($parsed_sha1 ne $HEAD_sha1) { my $child = $local_ancestry{$parsed_sha1}; if (!$child) { - printf STDERR "Cannot find a path in history from remote commit to last commit\n"; + print {*STDERR} "Cannot find a path in history from remote commit to last commit\n"; return error_non_fast_forward($remote); } push(@commit_pairs, [$parsed_sha1, $child]); @@ -1196,12 +1185,12 @@ sub mw_push_revision { } else { # No remote mediawiki revision. Export the whole # history (linearized with --first-parent) - print STDERR "Warning: no common ancestor, pushing complete history\n"; - my $history = run_git("rev-list --first-parent --children $local"); - my @history = split('\n', $history); + print {*STDERR} "Warning: no common ancestor, pushing complete history\n"; + my $history = run_git("rev-list --first-parent --children ${local}"); + my @history = split(/\n/, $history); @history = @history[1..$#history]; foreach my $line (reverse @history) { - my @commit_info_split = split(/ |\n/, $line); + my @commit_info_split = split(/[ \n]/, $line); push(@commit_pairs, \@commit_info_split); } } @@ -1209,12 +1198,12 @@ sub mw_push_revision { foreach my $commit_info_split (@commit_pairs) { my $sha1_child = @{$commit_info_split}[0]; my $sha1_commit = @{$commit_info_split}[1]; - my $diff_infos = run_git("diff-tree -r --raw -z $sha1_child $sha1_commit"); + my $diff_infos = run_git("diff-tree -r --raw -z ${sha1_child} ${sha1_commit}"); # TODO: we could detect rename, and encode them with a #redirect on the wiki. # TODO: for now, it's just a delete+add my @diff_info_list = split(/\0/, $diff_infos); # Keep the subject line of the commit message as mediawiki comment for the revision - my $commit_msg = run_git("log --no-walk --format=\"%s\" $sha1_commit"); + my $commit_msg = run_git(qq(log --no-walk --format="%s" ${sha1_commit})); chomp($commit_msg); # Push every blob while (@diff_info_list) { @@ -1226,7 +1215,7 @@ sub mw_push_revision { my $info = shift(@diff_info_list); my $file = shift(@diff_info_list); ($mw_revision, $status) = mw_push_file($info, $file, $commit_msg, $mw_revision); - if ($status eq "non-fast-forward") { + if ($status eq 'non-fast-forward') { # we may already have sent part of the # commit to MediaWiki, but it's too # late to cancel it. Stop the push in @@ -1234,22 +1223,21 @@ sub mw_push_revision { # accurate error message. return error_non_fast_forward($remote); } - if ($status ne "ok") { - die("Unknown error from mw_push_file()"); + if ($status ne 'ok') { + die("Unknown error from mw_push_file()\n"); } } - unless ($dumb_push) { - run_git("notes --ref=$remotename/mediawiki add -f -m \"mediawiki_revision: $mw_revision\" $sha1_commit"); - run_git("update-ref -m \"Git-MediaWiki push\" refs/mediawiki/$remotename/master $sha1_commit $sha1_child"); + if (!$dumb_push) { + run_git(qq(notes --ref=${remotename}/mediawiki add -f -m "mediawiki_revision: ${mw_revision}" ${sha1_commit})); } } - print STDOUT "ok $remote\n"; + print {*STDOUT} "ok ${remote}\n"; return 1; } sub get_allowed_file_extensions { - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); my $query = { action => 'query', @@ -1257,8 +1245,8 @@ sub get_allowed_file_extensions { siprop => 'fileextensions' }; my $result = $mediawiki->api($query); - my @file_extensions= map $_->{ext},@{$result->{query}->{fileextensions}}; - my %hashFile = map {$_ => 1}@file_extensions; + my @file_extensions = map { $_->{ext}} @{$result->{query}->{fileextensions}}; + my %hashFile = map { $_ => 1 } @file_extensions; return %hashFile; } @@ -1273,15 +1261,15 @@ my %cached_mw_namespace_id; # Return MediaWiki id for a canonical namespace name. # Ex.: "File", "Project". sub get_mw_namespace_id { - mw_connect_maybe(); + $mediawiki = connect_maybe($mediawiki, $remotename, $url); my $name = shift; if (!exists $namespace_id{$name}) { # Look at configuration file, if the record for that namespace is # already cached. Namespaces are stored in form: # "Name_of_namespace:Id_namespace", ex.: "File:6". - my @temp = split(/[\n]/, run_git("config --get-all remote." - . $remotename .".namespaceCache")); + my @temp = split(/\n/, + run_git("config --get-all remote.${remotename}.namespaceCache")); chomp(@temp); foreach my $ns (@temp) { my ($n, $id) = split(/:/, $ns); @@ -1295,7 +1283,7 @@ sub get_mw_namespace_id { } if (!exists $namespace_id{$name}) { - print STDERR "Namespace $name not found in cache, querying the wiki ...\n"; + print {*STDERR} "Namespace ${name} not found in cache, querying the wiki ...\n"; # NS not found => get namespace id from MW and store it in # configuration file. my $query = { @@ -1319,8 +1307,8 @@ sub get_mw_namespace_id { my $ns = $namespace_id{$name}; my $id; - unless (defined $ns) { - print STDERR "No such namespace $name on MediaWiki.\n"; + if (!defined $ns) { + print {*STDERR} "No such namespace ${name} on MediaWiki.\n"; $ns = {is_namespace => 0}; $namespace_id{$name} = $ns; } @@ -1332,17 +1320,17 @@ sub get_mw_namespace_id { # Store "notANameSpace" as special value for inexisting namespaces my $store_id = ($id || 'notANameSpace'); - # Store explicitely requested namespaces on disk + # Store explicitly requested namespaces on disk if (!exists $cached_mw_namespace_id{$name}) { - run_git("config --add remote.". $remotename - .".namespaceCache \"". $name .":". $store_id ."\""); + run_git(qq(config --add remote.${remotename}.namespaceCache "${name}:${store_id}")); $cached_mw_namespace_id{$name} = 1; } return $id; } sub get_mw_namespace_id_for_page { - if (my ($namespace) = $_[0] =~ /^([^:]*):/) { + my $namespace = shift; + if ($namespace =~ /^([^:]*):/) { return get_mw_namespace_id($namespace); } else { return; diff --git a/contrib/mw-to-git/git-remote-mediawiki.txt b/contrib/mw-to-git/git-remote-mediawiki.txt index 4d211f5b81..23b7ef9f62 100644 --- a/contrib/mw-to-git/git-remote-mediawiki.txt +++ b/contrib/mw-to-git/git-remote-mediawiki.txt @@ -4,4 +4,4 @@ objects from mediawiki just as one would do with a classic git repository thanks to remote-helpers. For more information, visit the wiki at -https://github.com/Bibzball/Git-Mediawiki/wiki +https://github.com/moy/Git-Mediawiki/wiki diff --git a/contrib/mw-to-git/t/README b/contrib/mw-to-git/t/README index 96e97390cf..03f6ee5d72 100644 --- a/contrib/mw-to-git/t/README +++ b/contrib/mw-to-git/t/README @@ -25,7 +25,7 @@ Principles and Technical Choices The test environment makes it easy to install and manipulate one or several MediaWiki instances. To allow developers to run the testsuite -easily, the environment does not require root priviledge (except to +easily, the environment does not require root privilege (except to install the required packages if needed). It starts a webserver instance on the user's account (using lighttpd greatly helps for that), and does not need a separate database daemon (thanks to the use @@ -81,7 +81,7 @@ parameters, please refer to the `test-gitmw-lib.sh` and ** `test_check_wiki_precond`: Check if the tests must be skipped or not. Please use this function -at the beggining of each new test file. +at the beginning of each new test file. ** `wiki_getpage`: Fetch a given page from the wiki and puts its content in the @@ -113,7 +113,7 @@ Tests if a given page exists on the wiki. ** `wiki_reset`: Reset the wiki, i.e. flush the database. Use this function at the -begining of each new test, except if the test re-uses the same wiki +beginning of each new test, except if the test re-uses the same wiki (and history) as the previous test. How to write a new test diff --git a/contrib/mw-to-git/t/install-wiki.sh b/contrib/mw-to-git/t/install-wiki.sh index c6d6fa3aef..c215213c4b 100755 --- a/contrib/mw-to-git/t/install-wiki.sh +++ b/contrib/mw-to-git/t/install-wiki.sh @@ -15,11 +15,13 @@ fi . "$WIKI_TEST_DIR"/test-gitmw-lib.sh usage () { - echo "Usage: " + echo "usage: " echo " ./install-wiki.sh <install | delete | --help>" echo " install | -i : Install a wiki on your computer." echo " delete | -d : Delete the wiki and all its pages and " echo " content." + echo " start | -s : Start the previously configured lighttpd daemon" + echo " stop : Stop lighttpd daemon." } @@ -33,6 +35,14 @@ case "$1" in wiki_delete exit 0 ;; + "start" | "-s") + start_lighttpd + exit + ;; + "stop") + stop_lighttpd + exit + ;; "--help" | "-h") usage exit 0 diff --git a/contrib/mw-to-git/t/install-wiki/LocalSettings.php b/contrib/mw-to-git/t/install-wiki/LocalSettings.php index 29f125116b..745e47e881 100644 --- a/contrib/mw-to-git/t/install-wiki/LocalSettings.php +++ b/contrib/mw-to-git/t/install-wiki/LocalSettings.php @@ -88,7 +88,7 @@ $wgShellLocale = "en_US.utf8"; ## Set $wgCacheDirectory to a writable directory on the web server ## to make your wiki go slightly faster. The directory should not -## be publically accessible from the web. +## be publicly accessible from the web. #$wgCacheDirectory = "$IP/cache"; # Site language code, should be one of the list in ./languages/Names.php diff --git a/contrib/mw-to-git/t/t9360-mw-to-git-clone.sh b/contrib/mw-to-git/t/t9360-mw-to-git-clone.sh index 811a90c9ae..22f069db48 100755 --- a/contrib/mw-to-git/t/t9360-mw-to-git-clone.sh +++ b/contrib/mw-to-git/t/t9360-mw-to-git-clone.sh @@ -191,10 +191,10 @@ test_expect_success 'Git clone works with the shallow option' ' test_path_is_file mw_dir_11/Main_Page.mw && ( cd mw_dir_11 && - test `git log --oneline Nyan.mw | wc -l` -eq 1 && - test `git log --oneline Foo.mw | wc -l` -eq 1 && - test `git log --oneline Bar.mw | wc -l` -eq 1 && - test `git log --oneline Main_Page.mw | wc -l ` -eq 1 + test $(git log --oneline Nyan.mw | wc -l) -eq 1 && + test $(git log --oneline Foo.mw | wc -l) -eq 1 && + test $(git log --oneline Bar.mw | wc -l) -eq 1 && + test $(git log --oneline Main_Page.mw | wc -l ) -eq 1 ) && wiki_check_content mw_dir_11/Nyan.mw Nyan && wiki_check_content mw_dir_11/Foo.mw Foo && @@ -218,9 +218,9 @@ test_expect_success 'Git clone works with the shallow option with a delete page' test_path_is_file mw_dir_12/Main_Page.mw && ( cd mw_dir_12 && - test `git log --oneline Nyan.mw | wc -l` -eq 1 && - test `git log --oneline Bar.mw | wc -l` -eq 1 && - test `git log --oneline Main_Page.mw | wc -l ` -eq 1 + test $(git log --oneline Nyan.mw | wc -l) -eq 1 && + test $(git log --oneline Bar.mw | wc -l) -eq 1 && + test $(git log --oneline Main_Page.mw | wc -l ) -eq 1 ) && wiki_check_content mw_dir_12/Nyan.mw Nyan && wiki_check_content mw_dir_12/Bar.mw Bar && diff --git a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh index 246d47d8fb..6b0dbdac4d 100755 --- a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh +++ b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh @@ -70,8 +70,8 @@ test_expect_success 'The shallow option works with accents' ' test_path_is_file mw_dir_4/Main_Page.mw && ( cd mw_dir_4 && - test `git log --oneline Néoà.mw | wc -l` -eq 1 && - test `git log --oneline Main_Page.mw | wc -l ` -eq 1 + test $(git log --oneline Néoà.mw | wc -l) -eq 1 && + test $(git log --oneline Main_Page.mw | wc -l ) -eq 1 ) && wiki_check_content mw_dir_4/Néoà.mw Néoà && wiki_check_content mw_dir_4/Main_Page.mw Main_Page @@ -139,7 +139,7 @@ test_expect_success 'character $ in file name (git -> mw) ' ' ' -test_expect_failure 'capital at the begining of file names' ' +test_expect_failure 'capital at the beginning of file names' ' wiki_reset && git clone mediawiki::'"$WIKI_URL"' mw_dir_10 && ( @@ -156,7 +156,7 @@ test_expect_failure 'capital at the begining of file names' ' ' -test_expect_failure 'special character at the begining of file name from mw to git' ' +test_expect_failure 'special character at the beginning of file name from mw to git' ' wiki_reset && git clone mediawiki::'"$WIKI_URL"' mw_dir_11 && wiki_editpage {char_1 "expect to be renamed {char_1" false && @@ -189,7 +189,7 @@ test_expect_success 'Push page with title containing ":" other than namespace se wiki_page_exist NotANameSpace:Page ' -test_expect_success 'test of correct formating for file name from mw to git' ' +test_expect_success 'test of correct formatting for file name from mw to git' ' wiki_reset && git clone mediawiki::'"$WIKI_URL"' mw_dir_12 && wiki_editpage char_%_7b_1 "expect to be renamed char{_1" false && @@ -207,7 +207,7 @@ test_expect_success 'test of correct formating for file name from mw to git' ' ' -test_expect_failure 'test of correct formating for file name begining with special character' ' +test_expect_failure 'test of correct formatting for file name beginning with special character' ' wiki_reset && git clone mediawiki::'"$WIKI_URL"' mw_dir_13 && ( @@ -215,7 +215,7 @@ test_expect_failure 'test of correct formating for file name begining with speci echo "my new file {char_1" >\{char_1.mw && echo "my new file [char_2" >\[char_2.mw && git add . && - git commit -am "commiting some exotic file name..." && + git commit -am "committing some exotic file name..." && git push && git pull ) && @@ -226,7 +226,7 @@ test_expect_failure 'test of correct formating for file name begining with speci ' -test_expect_success 'test of correct formating for file name from git to mw' ' +test_expect_success 'test of correct formatting for file name from git to mw' ' wiki_reset && git clone mediawiki::'"$WIKI_URL"' mw_dir_14 && ( @@ -234,7 +234,7 @@ test_expect_success 'test of correct formating for file name from git to mw' ' echo "my new file char{_1" >Char\{_1.mw && echo "my new file char[_2" >Char\[_2.mw && git add . && - git commit -m "commiting some exotic file name..." && + git commit -m "committing some exotic file name..." && git push ) && wiki_getallpage ref_page_14 && @@ -318,4 +318,30 @@ test_expect_success 'git push with \ in format control' ' ' +test_expect_success 'fast-import meta-characters in page name (mw -> git)' ' + wiki_reset && + wiki_editpage \"file\"_\\_foo "expect to be called \"file\"_\\_foo" false && + git clone mediawiki::'"$WIKI_URL"' mw_dir_21 && + test_path_is_file mw_dir_21/\"file\"_\\_foo.mw && + wiki_getallpage ref_page_21 && + test_diff_directories mw_dir_21 ref_page_21 +' + + +test_expect_success 'fast-import meta-characters in page name (git -> mw) ' ' + wiki_reset && + git clone mediawiki::'"$WIKI_URL"' mw_dir_22 && + ( + cd mw_dir_22 && + echo "this file is called \"file\"_\\_foo.mw" >\"file\"_\\_foo && + git add . && + git commit -am "file \"file\"_\\_foo" && + git pull && + git push + ) && + wiki_getallpage ref_page_22 && + test_diff_directories mw_dir_22 ref_page_22 +' + + test_done diff --git a/contrib/mw-to-git/t/t9363-mw-to-git-export-import.sh b/contrib/mw-to-git/t/t9363-mw-to-git-export-import.sh index 5a0373935f..3ff3a09567 100755 --- a/contrib/mw-to-git/t/t9363-mw-to-git-export-import.sh +++ b/contrib/mw-to-git/t/t9363-mw-to-git-export-import.sh @@ -58,6 +58,25 @@ test_expect_success 'git clone works on previously created wiki with media files test_cmp mw_dir_clone/Foo.txt mw_dir/Foo.txt ' +test_expect_success 'git push can upload media (File:) files containing valid UTF-8' ' + wiki_reset && + git clone mediawiki::'"$WIKI_URL"' mw_dir && + ( + cd mw_dir && + "$PERL_PATH" -e "print STDOUT \"UTF-8 content: éèàéê€.\";" >Bar.txt && + git add Bar.txt && + git commit -m "add a text file with UTF-8 content" && + git push + ) +' + +test_expect_success 'git clone works on previously created wiki with media files containing valid UTF-8' ' + test_when_finished "rm -rf mw_dir mw_dir_clone" && + git clone -c remote.origin.mediaimport=true \ + mediawiki::'"$WIKI_URL"' mw_dir_clone && + test_cmp mw_dir_clone/Bar.txt mw_dir/Bar.txt +' + test_expect_success 'git push & pull work with locally renamed media files' ' wiki_reset && git clone mediawiki::'"$WIKI_URL"' mw_dir && diff --git a/contrib/mw-to-git/t/t9365-continuing-queries.sh b/contrib/mw-to-git/t/t9365-continuing-queries.sh new file mode 100755 index 0000000000..016454749f --- /dev/null +++ b/contrib/mw-to-git/t/t9365-continuing-queries.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +test_description='Test the Git Mediawiki remote helper: queries w/ more than 500 results' + +. ./test-gitmw-lib.sh +. $TEST_DIRECTORY/test-lib.sh + +test_check_precond + +test_expect_success 'creating page w/ >500 revisions' ' + wiki_reset && + for i in $(test_seq 501) + do + echo "creating revision $i" && + wiki_editpage foo "revision $i<br/>" true + done +' + +test_expect_success 'cloning page w/ >500 revisions' ' + git clone mediawiki::'"$WIKI_URL"' mw_dir +' + +test_done diff --git a/contrib/mw-to-git/t/test-gitmw-lib.sh b/contrib/mw-to-git/t/test-gitmw-lib.sh index 3b2cfacf51..6546294f15 100755 --- a/contrib/mw-to-git/t/test-gitmw-lib.sh +++ b/contrib/mw-to-git/t/test-gitmw-lib.sh @@ -62,12 +62,8 @@ test_check_precond () { test_done fi - if [ ! -f "$GIT_BUILD_DIR"/git-remote-mediawiki ]; - then - echo "No remote mediawiki for git found. Copying it in git" - echo "cp $GIT_BUILD_DIR/contrib/mw-to-git/git-remote-mediawiki $GIT_BUILD_DIR/" - ln -s "$GIT_BUILD_DIR"/contrib/mw-to-git/git-remote-mediawiki "$GIT_BUILD_DIR" - fi + GIT_EXEC_PATH=$(cd "$(dirname "$0")" && cd "../.." && pwd) + PATH="$GIT_EXEC_PATH"'/bin-wrapper:'"$PATH" if [ ! -d "$WIKI_DIR_INST/$WIKI_DIR_NAME" ]; then @@ -94,8 +90,8 @@ test_diff_directories () { # # Check that <dir> contains exactly <N> files test_contains_N_files () { - if test `ls -- "$1" | wc -l` -ne "$2"; then - echo "directory $1 sould contain $2 files" + if test $(ls -- "$1" | wc -l) -ne "$2"; then + echo "directory $1 should contain $2 files" echo "it contains these files:" ls "$1" false @@ -293,7 +289,6 @@ start_lighttpd () { # Kill daemon lighttpd and removes files and folders associated. stop_lighttpd () { test -f "$WEB_TMP/pid" && kill $(cat "$WEB_TMP/pid") - rm -rf "$WEB" } # Create the SQLite database of the MediaWiki. If the database file already @@ -336,20 +331,21 @@ wiki_install () { fi # Fetch MediaWiki's archive if not already present in the TMP directory + MW_FILENAME="mediawiki-$MW_VERSION_MAJOR.$MW_VERSION_MINOR.tar.gz" cd "$TMP" - if [ ! -f "$MW_VERSION.tar.gz" ] ; then - echo "Downloading $MW_VERSION sources ..." - wget "http://download.wikimedia.org/mediawiki/1.19/mediawiki-1.19.0.tar.gz" || + if [ ! -f $MW_FILENAME ] ; then + echo "Downloading $MW_VERSION_MAJOR.$MW_VERSION_MINOR sources ..." + wget "http://download.wikimedia.org/mediawiki/$MW_VERSION_MAJOR/$MW_FILENAME" || error "Unable to download "\ - "http://download.wikimedia.org/mediawiki/1.19/"\ - "mediawiki-1.19.0.tar.gz. "\ + "http://download.wikimedia.org/mediawiki/$MW_VERSION_MAJOR/"\ + "$MW_FILENAME. "\ "Please fix your connection and launch the script again." - echo "$MW_VERSION.tar.gz downloaded in `pwd`. "\ + echo "$MW_FILENAME downloaded in $(pwd). "\ "You can delete it later if you want." else - echo "Reusing existing $MW_VERSION.tar.gz downloaded in `pwd`." + echo "Reusing existing $MW_FILENAME downloaded in $(pwd)." fi - archive_abs_path=$(pwd)/"$MW_VERSION.tar.gz" + archive_abs_path=$(pwd)/$MW_FILENAME cd "$WIKI_DIR_INST/$WIKI_DIR_NAME/" || error "can't cd to $WIKI_DIR_INST/$WIKI_DIR_NAME/" tar xzf "$archive_abs_path" --strip-components=1 || @@ -418,6 +414,7 @@ wiki_reset () { wiki_delete () { if test $LIGHTTPD = "true"; then stop_lighttpd + rm -fr "$WEB" else # Delete the wiki's directory. rm -rf "$WIKI_DIR_INST/$WIKI_DIR_NAME" || @@ -431,5 +428,5 @@ wiki_delete () { # Delete the wiki's SQLite database rm -f "$TMP/$DB_FILE" || error "Database $TMP/$DB_FILE could not be deleted." rm -f "$FILES_FOLDER/$DB_FILE" - rm -rf "$TMP/$MW_VERSION" + rm -rf "$TMP/mediawiki-$MW_VERSION_MAJOR.$MW_VERSION_MINOR.tar.gz" } diff --git a/contrib/mw-to-git/t/test.config b/contrib/mw-to-git/t/test.config index 958b37b4a7..5ba0684162 100644 --- a/contrib/mw-to-git/t/test.config +++ b/contrib/mw-to-git/t/test.config @@ -12,7 +12,7 @@ SERVER_ADDR=localhost TMP=/tmp DB_FILE=wikidb.sqlite -# If LIGHTTPD is not set to true, the script will use the defaut +# If LIGHTTPD is not set to true, the script will use the default # web server running in WIKI_DIR_INST. WIKI_DIR_INST=/var/www @@ -30,6 +30,8 @@ WEB_WWW=$WEB/www # The variables below are used by the script to install a wiki. # You should not modify these unless you are modifying the script itself. -MW_VERSION=mediawiki-1.19.0 +# tested versions: 1.19.X -> 1.21.1 +MW_VERSION_MAJOR=1.21 +MW_VERSION_MINOR=1 FILES_FOLDER=install-wiki DB_INSTALL_SCRIPT=db_install.php |