summaryrefslogtreecommitdiff
path: root/contrib/mw-to-git
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/mw-to-git')
-rw-r--r--contrib/mw-to-git/.gitignore1
-rw-r--r--contrib/mw-to-git/Git/Mediawiki.pm78
-rw-r--r--contrib/mw-to-git/Makefile23
-rwxr-xr-xcontrib/mw-to-git/bin-wrapper/git14
-rwxr-xr-xcontrib/mw-to-git/git-mw.perl368
-rwxr-xr-xcontrib/mw-to-git/git-remote-mediawiki.perl109
-rwxr-xr-xcontrib/mw-to-git/t/t9365-continuing-queries.sh23
-rwxr-xr-xcontrib/mw-to-git/t/test-gitmw-lib.sh10
-rw-r--r--contrib/mw-to-git/t/test.config2
9 files changed, 537 insertions, 91 deletions
diff --git a/contrib/mw-to-git/.gitignore b/contrib/mw-to-git/.gitignore
index b9196555e5..ae545b013d 100644
--- a/contrib/mw-to-git/.gitignore
+++ b/contrib/mw-to-git/.gitignore
@@ -1 +1,2 @@
git-remote-mediawiki
+git-mw
diff --git a/contrib/mw-to-git/Git/Mediawiki.pm b/contrib/mw-to-git/Git/Mediawiki.pm
index 805f42a49e..d13c4dfa7d 100644
--- a/contrib/mw-to-git/Git/Mediawiki.pm
+++ b/contrib/mw-to-git/Git/Mediawiki.pm
@@ -18,7 +18,83 @@ require Exporter;
@EXPORT = ();
# Methods which can be called as standalone functions as well:
-@EXPORT_OK = ();
+@EXPORT_OK = qw(clean_filename smudge_filename connect_maybe
+ EMPTY HTTP_CODE_OK HTTP_CODE_PAGE_NOT_FOUND);
+}
+
+# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced
+use constant SLASH_REPLACEMENT => '%2F';
+
+# Used to test for empty strings
+use constant EMPTY => q{};
+
+# HTTP codes
+use constant HTTP_CODE_OK => 200;
+use constant HTTP_CODE_PAGE_NOT_FOUND => 404;
+
+sub clean_filename {
+ my $filename = shift;
+ $filename =~ s{@{[SLASH_REPLACEMENT]}}{/}g;
+ # [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded.
+ # Do a variant of URL-encoding, i.e. looks like URL-encoding,
+ # but with _ added to prevent MediaWiki from thinking this is
+ # an actual special character.
+ $filename =~ s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge;
+ # If we use the uri escape before
+ # we should unescape here, before anything
+
+ return $filename;
+}
+
+sub smudge_filename {
+ my $filename = shift;
+ $filename =~ s{/}{@{[SLASH_REPLACEMENT]}}g;
+ $filename =~ s/ /_/g;
+ # Decode forbidden characters encoded in clean_filename
+ $filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf('%c', hex($1))/ge;
+ return $filename;
+}
+
+sub connect_maybe {
+ my $wiki = shift;
+ if ($wiki) {
+ return $wiki;
+ }
+
+ my $remote_name = shift;
+ my $remote_url = shift;
+ my ($wiki_login, $wiki_password, $wiki_domain);
+
+ $wiki_login = Git::config("remote.${remote_name}.mwLogin");
+ $wiki_password = Git::config("remote.${remote_name}.mwPassword");
+ $wiki_domain = Git::config("remote.${remote_name}.mwDomain");
+
+ $wiki = MediaWiki::API->new;
+ $wiki->{config}->{api_url} = "${remote_url}/api.php";
+ if ($wiki_login) {
+ my %credential = (
+ 'url' => $remote_url,
+ 'username' => $wiki_login,
+ 'password' => $wiki_password
+ );
+ Git::credential(\%credential);
+ my $request = {lgname => $credential{username},
+ lgpassword => $credential{password},
+ lgdomain => $wiki_domain};
+ if ($wiki->login($request)) {
+ Git::credential(\%credential, 'approve');
+ print {*STDERR} qq(Logged in mediawiki user "$credential{username}".\n);
+ } else {
+ print {*STDERR} qq(Failed to log in mediawiki user "$credential{username}" on ${remote_url}\n);
+ print {*STDERR} ' (error ' .
+ $wiki->{error}->{code} . ': ' .
+ $wiki->{error}->{details} . ")\n";
+ Git::credential(\%credential, 'reject');
+ exit 1;
+ }
+ }
+
+ return $wiki;
}
1; # Famous last words
diff --git a/contrib/mw-to-git/Makefile b/contrib/mw-to-git/Makefile
index 37b68e2571..a4b6f7a2cd 100644
--- a/contrib/mw-to-git/Makefile
+++ b/contrib/mw-to-git/Makefile
@@ -2,12 +2,19 @@
# Copyright (C) 2013
# Matthieu Moy <Matthieu.Moy@imag.fr>
#
+# To build and test:
+#
+# make
+# bin-wrapper/git mw preview Some_page.mw
+# bin-wrapper/git clone mediawiki::http://example.com/wiki/
+#
# To install, run Git's toplevel 'make install' then run:
#
# make install
GIT_MEDIAWIKI_PM=Git/Mediawiki.pm
SCRIPT_PERL=git-remote-mediawiki.perl
+SCRIPT_PERL+=git-mw.perl
GIT_ROOT_DIR=../..
HERE=contrib/mw-to-git/
@@ -21,22 +28,30 @@ INSTLIBDIR_SQ = $(subst ','\'',$(INSTLIBDIR))
all: build
+test: all
+ $(MAKE) -C t
+
+check: perlcritic test
+
install_pm:
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(INSTLIBDIR_SQ)/Git'
$(INSTALL) -m 644 $(GIT_MEDIAWIKI_PM) \
'$(DESTDIR_SQ)$(INSTLIBDIR_SQ)/$(GIT_MEDIAWIKI_PM)'
build:
- $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL=$(SCRIPT_PERL_FULL) \
+ $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL="$(SCRIPT_PERL_FULL)" \
build-perl-script
install: install_pm
- $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL=$(SCRIPT_PERL_FULL) \
+ $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL="$(SCRIPT_PERL_FULL)" \
install-perl-script
clean:
- $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL=$(SCRIPT_PERL_FULL) \
+ $(MAKE) -C $(GIT_ROOT_DIR) SCRIPT_PERL="$(SCRIPT_PERL_FULL)" \
clean-perl-script
perlcritic:
- perlcritic -2 *.perl
+ perlcritic -5 $(SCRIPT_PERL)
+ -perlcritic -2 $(SCRIPT_PERL)
+
+.PHONY: all test check install_pm install clean perlcritic
diff --git a/contrib/mw-to-git/bin-wrapper/git b/contrib/mw-to-git/bin-wrapper/git
new file mode 100755
index 0000000000..6663ae57e8
--- /dev/null
+++ b/contrib/mw-to-git/bin-wrapper/git
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+# git executable wrapper script for Git-Mediawiki to run tests without
+# installing all the scripts and perl packages.
+
+GIT_ROOT_DIR=../../..
+GIT_EXEC_PATH=$(cd "$(dirname "$0")" && cd ${GIT_ROOT_DIR} && pwd)
+
+GITPERLLIB="$GIT_EXEC_PATH"'/contrib/mw-to-git'"${GITPERLLIB:+:$GITPERLLIB}"
+PATH="$GIT_EXEC_PATH"'/contrib/mw-to-git:'"$PATH"
+
+export GITPERLLIB PATH
+
+exec "${GIT_EXEC_PATH}/bin-wrappers/git" "$@"
diff --git a/contrib/mw-to-git/git-mw.perl b/contrib/mw-to-git/git-mw.perl
new file mode 100755
index 0000000000..28df3ee321
--- /dev/null
+++ b/contrib/mw-to-git/git-mw.perl
@@ -0,0 +1,368 @@
+#!/usr/bin/perl
+
+# Copyright (C) 2013
+# Benoit Person <benoit.person@ensimag.imag.fr>
+# Celestin Matte <celestin.matte@ensimag.imag.fr>
+# License: GPL v2 or later
+
+# Set of tools for git repo with a mediawiki remote.
+# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/
+
+use strict;
+use warnings;
+
+use Getopt::Long;
+use URI::URL qw(url);
+use LWP::UserAgent;
+use HTML::TreeBuilder;
+
+use Git;
+use MediaWiki::API;
+use Git::Mediawiki qw(clean_filename connect_maybe
+ EMPTY HTTP_CODE_PAGE_NOT_FOUND);
+
+# By default, use UTF-8 to communicate with Git and the user
+binmode STDERR, ':encoding(UTF-8)';
+binmode STDOUT, ':encoding(UTF-8)';
+
+# Global parameters
+my $verbose = 0;
+sub v_print {
+ if ($verbose) {
+ return print {*STDERR} @_;
+ }
+ return;
+}
+
+# Preview parameters
+my $file_name = EMPTY;
+my $remote_name = EMPTY;
+my $preview_file_name = EMPTY;
+my $autoload = 0;
+sub file {
+ $file_name = shift;
+ return $file_name;
+}
+
+my %commands = (
+ 'help' =>
+ [\&help, {}, \&help],
+ 'preview' =>
+ [\&preview, {
+ '<>' => \&file,
+ 'output|o=s' => \$preview_file_name,
+ 'remote|r=s' => \$remote_name,
+ 'autoload|a' => \$autoload
+ }, \&preview_help]
+);
+
+# Search for sub-command
+my $cmd = $commands{'help'};
+for (0..@ARGV-1) {
+ if (defined $commands{$ARGV[$_]}) {
+ $cmd = $commands{$ARGV[$_]};
+ splice @ARGV, $_, 1;
+ last;
+ }
+};
+GetOptions( %{$cmd->[1]},
+ 'help|h' => \&{$cmd->[2]},
+ 'verbose|v' => \$verbose);
+
+# Launch command
+&{$cmd->[0]};
+
+############################# Preview Functions ################################
+
+sub preview_help {
+ print {*STDOUT} <<'END';
+USAGE: git mw preview [--remote|-r <remote name>] [--autoload|-a]
+ [--output|-o <output filename>] [--verbose|-v]
+ <blob> | <filename>
+
+DESCRIPTION:
+Preview is an utiliy to preview local content of a mediawiki repo as if it was
+pushed on the remote.
+
+For that, preview searches for the remote name of the current branch's
+upstream if --remote is not set. If that remote is not found or if it
+is not a mediawiki, it lists all mediawiki remotes configured and asks
+you to replay your command with the --remote option set properly.
+
+Then, it searches for a file named 'filename'. If it's not found in
+the current dir, it will assume it's a blob.
+
+The content retrieved in the file (or in the blob) will then be parsed
+by the remote mediawiki and combined with a template retrieved from
+the mediawiki.
+
+Finally, preview will save the HTML result in a file. and autoload it
+in your default web browser if the option --autoload is present.
+
+OPTIONS:
+ -r <remote name>, --remote <remote name>
+ If the remote is a mediawiki, the template and the parse engine
+ used for the preview will be those of that remote.
+ If not, a list of valid remotes will be shown.
+
+ -a, --autoload
+ Try to load the HTML output in a new tab (or new window) of your
+ default web browser.
+
+ -o <output filename>, --output <output filename>
+ Change the HTML output filename. Default filename is based on the
+ input filename with its extension replaced by '.html'.
+
+ -v, --verbose
+ Show more information on what's going on under the hood.
+END
+ exit;
+}
+
+sub preview {
+ my $wiki;
+ my ($remote_url, $wiki_page_name);
+ my ($new_content, $template);
+ my $file_content;
+
+ if ($file_name eq EMPTY) {
+ die "Missing file argument, see `git mw help`\n";
+ }
+
+ v_print("### Selecting remote\n");
+ if ($remote_name eq EMPTY) {
+ $remote_name = find_upstream_remote_name();
+ if ($remote_name) {
+ $remote_url = mediawiki_remote_url_maybe($remote_name);
+ }
+
+ if (! $remote_url) {
+ my @valid_remotes = find_mediawiki_remotes();
+
+ if ($#valid_remotes == 0) {
+ print {*STDERR} "No mediawiki remote in this repo. \n";
+ exit 1;
+ } else {
+ my $remotes_list = join("\n\t", @valid_remotes);
+ print {*STDERR} <<"MESSAGE";
+There are multiple mediawiki remotes, which of:
+ ${remotes_list}
+do you want ? Use the -r option to specify the remote.
+MESSAGE
+ }
+
+ exit 1;
+ }
+ } else {
+ if (!is_valid_remote($remote_name)) {
+ die "${remote_name} is not a remote\n";
+ }
+
+ $remote_url = mediawiki_remote_url_maybe($remote_name);
+ if (! $remote_url) {
+ die "${remote_name} is not a mediawiki remote\n";
+ }
+ }
+ v_print("selected remote:\n\tname: ${remote_name}\n\turl: ${remote_url}\n");
+
+ $wiki = connect_maybe($wiki, $remote_name, $remote_url);
+
+ # Read file content
+ if (! -e $file_name) {
+ $file_content = git_cmd_try {
+ Git::command('cat-file', 'blob', $file_name); }
+ "%s failed w/ code %d";
+
+ if ($file_name =~ /(.+):(.+)/) {
+ $file_name = $2;
+ }
+ } else {
+ open my $read_fh, "<", $file_name
+ or die "could not open ${file_name}: $!\n";
+ $file_content = do { local $/ = undef; <$read_fh> };
+ close $read_fh
+ or die "unable to close: $!\n";
+ }
+
+ v_print("### Retrieving template\n");
+ ($wiki_page_name = clean_filename($file_name)) =~ s/\.[^.]+$//;
+ $template = get_template($remote_url, $wiki_page_name);
+
+ v_print("### Parsing local content\n");
+ $new_content = $wiki->api({
+ action => 'parse',
+ text => $file_content,
+ title => $wiki_page_name
+ }, {
+ skip_encoding => 1
+ }) or die "No response from remote mediawiki\n";
+ $new_content = $new_content->{'parse'}->{'text'}->{'*'};
+
+ v_print("### Merging contents\n");
+ if ($preview_file_name eq EMPTY) {
+ ($preview_file_name = $file_name) =~ s/\.[^.]+$/.html/;
+ }
+ open(my $save_fh, '>:encoding(UTF-8)', $preview_file_name)
+ or die "Could not open: $!\n";
+ print {$save_fh} merge_contents($template, $new_content, $remote_url);
+ close($save_fh)
+ or die "Could not close: $!\n";
+
+ v_print("### Results\n");
+ if ($autoload) {
+ v_print("Launching browser w/ file: ${preview_file_name}");
+ system('git', 'web--browse', $preview_file_name);
+ } else {
+ print {*STDERR} "Preview file saved as: ${preview_file_name}\n";
+ }
+
+ exit;
+}
+
+# uses global scope variable: $remote_name
+sub merge_contents {
+ my $template = shift;
+ my $content = shift;
+ my $remote_url = shift;
+ my ($content_tree, $html_tree, $mw_content_text);
+ my $template_content_id = 'bodyContent';
+
+ $html_tree = HTML::TreeBuilder->new;
+ $html_tree->parse($template);
+
+ $content_tree = HTML::TreeBuilder->new;
+ $content_tree->parse($content);
+
+ $template_content_id = Git::config("remote.${remote_name}.mwIDcontent")
+ || $template_content_id;
+ v_print("Using '${template_content_id}' as the content ID\n");
+
+ $mw_content_text = $html_tree->look_down('id', $template_content_id);
+ if (!defined $mw_content_text) {
+ print {*STDERR} <<"CONFIG";
+Could not combine the new content with the template. You might want to
+configure `mediawiki.IDContent` in your config:
+ git config --add remote.${remote_name}.mwIDcontent <id>
+and re-run the command afterward.
+CONFIG
+ exit 1;
+ }
+ $mw_content_text->delete_content();
+ $mw_content_text->push_content($content_tree);
+
+ make_links_absolute($html_tree, $remote_url);
+
+ return $html_tree->as_HTML;
+}
+
+sub make_links_absolute {
+ my $html_tree = shift;
+ my $remote_url = shift;
+ for (@{ $html_tree->extract_links() }) {
+ my ($link, $element, $attr) = @{ $_ };
+ my $url = url($link)->canonical;
+ if ($url !~ /#/) {
+ $element->attr($attr, URI->new_abs($url, $remote_url));
+ }
+ }
+ return $html_tree;
+}
+
+sub is_valid_remote {
+ my $remote = shift;
+ my @remotes = git_cmd_try {
+ Git::command('remote') }
+ "%s failed w/ code %d";
+ my $found_remote = 0;
+ foreach my $remote (@remotes) {
+ if ($remote eq $remote) {
+ $found_remote = 1;
+ last;
+ }
+ }
+ return $found_remote;
+}
+
+sub find_mediawiki_remotes {
+ my @remotes = git_cmd_try {
+ Git::command('remote'); }
+ "%s failed w/ code %d";
+ my $remote_url;
+ my @valid_remotes = ();
+ foreach my $remote (@remotes) {
+ $remote_url = mediawiki_remote_url_maybe($remote);
+ if ($remote_url) {
+ push(@valid_remotes, $remote);
+ }
+ }
+ return @valid_remotes;
+}
+
+sub find_upstream_remote_name {
+ my $current_branch = git_cmd_try {
+ Git::command_oneline('symbolic-ref', '--short', 'HEAD') }
+ "%s failed w/ code %d";
+ return Git::config("branch.${current_branch}.remote");
+}
+
+sub mediawiki_remote_url_maybe {
+ my $remote = shift;
+
+ # Find remote url
+ my $remote_url = Git::config("remote.${remote}.url");
+ if ($remote_url =~ s/mediawiki::(.*)/$1/) {
+ return url($remote_url)->canonical;
+ }
+
+ return;
+}
+
+sub get_template {
+ my $url = shift;
+ my $page_name = shift;
+ my ($req, $res, $code, $url_after);
+
+ $req = LWP::UserAgent->new;
+ if ($verbose) {
+ $req->show_progress(1);
+ }
+
+ $res = $req->get("${url}/index.php?title=${page_name}");
+ if (!$res->is_success) {
+ $code = $res->code;
+ $url_after = $res->request()->uri(); # resolve all redirections
+ if ($code == HTTP_CODE_PAGE_NOT_FOUND) {
+ if ($verbose) {
+ print {*STDERR} <<"WARNING";
+Warning: Failed to retrieve '$page_name'. Create it on the mediawiki if you want
+all the links to work properly.
+Trying to use the mediawiki homepage as a fallback template ...
+WARNING
+ }
+
+ # LWP automatically redirects GET request
+ $res = $req->get("${url}/index.php");
+ if (!$res->is_success) {
+ $url_after = $res->request()->uri(); # resolve all redirections
+ die "Failed to get homepage @ ${url_after} w/ code ${code}\n";
+ }
+ } else {
+ die "Failed to get '${page_name}' @ ${url_after} w/ code ${code}\n";
+ }
+ }
+
+ return $res->decoded_content;
+}
+
+############################## Help Functions ##################################
+
+sub help {
+ print {*STDOUT} <<'END';
+usage: git mw <command> <args>
+
+git mw commands are:
+ help Display help information about git mw
+ preview Parse and render local file into HTML
+END
+ exit;
+}
diff --git a/contrib/mw-to-git/git-remote-mediawiki.perl b/contrib/mw-to-git/git-remote-mediawiki.perl
index 71baf8ace8..3f8d993afa 100755
--- a/contrib/mw-to-git/git-remote-mediawiki.perl
+++ b/contrib/mw-to-git/git-remote-mediawiki.perl
@@ -14,6 +14,8 @@
use strict;
use MediaWiki::API;
use Git;
+use Git::Mediawiki qw(clean_filename smudge_filename connect_maybe
+ EMPTY HTTP_CODE_OK);
use DateTime::Format::ISO8601;
use warnings;
@@ -23,9 +25,6 @@ binmode STDOUT, ':encoding(UTF-8)';
use URI::Escape;
-# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced
-use constant SLASH_REPLACEMENT => '%2F';
-
# It's not always possible to delete pages (may require some
# privileges). Deleted pages are replaced with this content.
use constant DELETED_CONTENT => "[[Category:Deleted]]\n";
@@ -40,8 +39,6 @@ use constant NULL_SHA1 => '0000000000000000000000000000000000000000';
# Used on Git's side to reflect empty edit messages on the wiki
use constant EMPTY_MESSAGE => '*Empty MediaWiki Message*';
-use constant EMPTY => q{};
-
# Number of pages taken into account at once in submodule get_mw_page_list
use constant SLICE_SIZE => 50;
@@ -50,8 +47,6 @@ use constant SLICE_SIZE => 50;
# the number of links to be returned (500 links max).
use constant BATCH_SIZE => 10;
-use constant HTTP_CODE_OK => 200;
-
if (@ARGV != 2) {
exit_error_usage();
}
@@ -199,37 +194,6 @@ sub parse_command {
# MediaWiki API instance, created lazily.
my $mediawiki;
-sub mw_connect_maybe {
- if ($mediawiki) {
- return;
- }
- $mediawiki = MediaWiki::API->new;
- $mediawiki->{config}->{api_url} = "${url}/api.php";
- if ($wiki_login) {
- my %credential = (
- 'url' => $url,
- 'username' => $wiki_login,
- 'password' => $wiki_passwd
- );
- Git::credential(\%credential);
- my $request = {lgname => $credential{username},
- lgpassword => $credential{password},
- lgdomain => $wiki_domain};
- if ($mediawiki->login($request)) {
- Git::credential(\%credential, 'approve');
- print {*STDERR} qq(Logged in mediawiki user "$credential{username}".\n);
- } else {
- print {*STDERR} qq(Failed to log in mediawiki user "$credential{username}" on ${url}\n);
- print {*STDERR} ' (error ' .
- $mediawiki->{error}->{code} . ': ' .
- $mediawiki->{error}->{details} . ")\n";
- Git::credential(\%credential, 'reject');
- exit 1;
- }
- }
- return;
-}
-
sub fatal_mw_error {
my $action = shift;
print STDERR "fatal: could not $action.\n";
@@ -339,7 +303,7 @@ sub get_mw_first_pages {
# Get the list of pages to be fetched according to configuration.
sub get_mw_pages {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
print {*STDERR} "Listing pages on remote wiki...\n";
@@ -529,7 +493,7 @@ sub get_last_local_revision {
# avoid a loop onto all tracked pages. This is useful for the fetch-by-rev
# option.
sub get_last_global_remote_rev {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $query = {
action => 'query',
@@ -545,7 +509,7 @@ sub get_last_global_remote_rev {
# Get the last remote revision concerning the tracked pages and the tracked
# categories.
sub get_last_remote_revision {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my %pages_hash = get_mw_pages();
my @pages = values(%pages_hash);
@@ -601,29 +565,6 @@ sub mediawiki_smudge {
return "${string}\n";
}
-sub mediawiki_clean_filename {
- my $filename = shift;
- $filename =~ s{@{[SLASH_REPLACEMENT]}}{/}g;
- # [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded.
- # Do a variant of URL-encoding, i.e. looks like URL-encoding,
- # but with _ added to prevent MediaWiki from thinking this is
- # an actual special character.
- $filename =~ s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge;
- # If we use the uri escape before
- # we should unescape here, before anything
-
- return $filename;
-}
-
-sub mediawiki_smudge_filename {
- my $filename = shift;
- $filename =~ s{/}{@{[SLASH_REPLACEMENT]}}g;
- $filename =~ s/ /_/g;
- # Decode forbidden characters encoded in mediawiki_clean_filename
- $filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf('%c', hex($1))/ge;
- return $filename;
-}
-
sub literal_data {
my ($content) = @_;
print {*STDOUT} 'data ', bytes::length($content), "\n", $content;
@@ -635,9 +576,9 @@ sub literal_data_raw {
my ($content) = @_;
# Avoid confusion between size in bytes and in characters
utf8::downgrade($content);
- binmode {*STDOUT}, ':raw';
+ binmode STDOUT, ':raw';
print {*STDOUT} 'data ', bytes::length($content), "\n", $content;
- binmode {*STDOUT}, ':encoding(UTF-8)';
+ binmode STDOUT, ':encoding(UTF-8)';
return;
}
@@ -649,6 +590,9 @@ sub mw_capabilities {
print {*STDOUT} "import\n";
print {*STDOUT} "list\n";
print {*STDOUT} "push\n";
+ if ($dumb_push) {
+ print {*STDOUT} "no-private-update\n";
+ }
print {*STDOUT} "\n";
return;
}
@@ -681,6 +625,9 @@ sub fetch_mw_revisions_for_page {
rvstartid => $fetch_from,
rvlimit => 500,
pageids => $id,
+
+ # Let MediaWiki know that we support the latest API.
+ continue => '',
};
my $revnum = 0;
@@ -696,8 +643,15 @@ sub fetch_mw_revisions_for_page {
push(@page_revs, $page_rev_ids);
$revnum++;
}
- last if (!$result->{'query-continue'});
- $query->{rvstartid} = $result->{'query-continue'}->{revisions}->{rvstartid};
+
+ if ($result->{'query-continue'}) { # For legacy APIs
+ $query->{rvstartid} = $result->{'query-continue'}->{revisions}->{rvstartid};
+ } elsif ($result->{continue}) { # For newer APIs
+ $query->{rvstartid} = $result->{continue}->{rvcontinue};
+ $query->{continue} = $result->{continue}->{continue};
+ } else {
+ last;
+ }
}
if ($shallow_import && @page_revs) {
print {*STDERR} " Found 1 revision (shallow import).\n";
@@ -831,7 +785,7 @@ sub mw_import_ref {
return;
}
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
print {*STDERR} "Searching revisions...\n";
my $last_local = get_last_local_revision();
@@ -945,7 +899,7 @@ sub mw_import_revids {
my %commit;
$commit{author} = $rev->{user} || 'Anonymous';
$commit{comment} = $rev->{comment} || EMPTY_MESSAGE;
- $commit{title} = mediawiki_smudge_filename($page_title);
+ $commit{title} = smudge_filename($page_title);
$commit{mw_revision} = $rev->{revid};
$commit{content} = mediawiki_smudge($rev->{'*'});
@@ -1006,7 +960,7 @@ sub mw_upload_file {
}
# Deleting and uploading a file requires a priviledged user
if ($file_deleted) {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $query = {
action => 'delete',
title => $path,
@@ -1022,7 +976,7 @@ sub mw_upload_file {
# Don't let perl try to interpret file content as UTF-8 => use "raw"
my $content = run_git("cat-file blob ${new_sha1}", 'raw');
if ($content ne EMPTY) {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
$mediawiki->{config}->{upload_url} =
"${url}/index.php/Special:Upload";
$mediawiki->edit({
@@ -1070,7 +1024,7 @@ sub mw_push_file {
my $old_sha1 = $diff_info_split[2];
my $page_created = ($old_sha1 eq NULL_SHA1);
my $page_deleted = ($new_sha1 eq NULL_SHA1);
- $complete_file_name = mediawiki_clean_filename($complete_file_name);
+ $complete_file_name = clean_filename($complete_file_name);
my ($title, $extension) = $complete_file_name =~ /^(.*)\.([^\.]*)$/;
if (!defined($extension)) {
@@ -1093,7 +1047,7 @@ sub mw_push_file {
$file_content = run_git("cat-file blob ${new_sha1}");
}
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $result = $mediawiki->edit( {
action => 'edit',
@@ -1270,7 +1224,6 @@ sub mw_push_revision {
}
if (!$dumb_push) {
run_git(qq(notes --ref=${remotename}/mediawiki add -f -m "mediawiki_revision: ${mw_revision}" ${sha1_commit}));
- run_git(qq(update-ref -m "Git-MediaWiki push" refs/mediawiki/${remotename}/master ${sha1_commit} ${sha1_child}));
}
}
@@ -1279,7 +1232,7 @@ sub mw_push_revision {
}
sub get_allowed_file_extensions {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $query = {
action => 'query',
@@ -1303,7 +1256,7 @@ my %cached_mw_namespace_id;
# Return MediaWiki id for a canonical namespace name.
# Ex.: "File", "Project".
sub get_mw_namespace_id {
- mw_connect_maybe();
+ $mediawiki = connect_maybe($mediawiki, $remotename, $url);
my $name = shift;
if (!exists $namespace_id{$name}) {
@@ -1362,7 +1315,7 @@ sub get_mw_namespace_id {
# Store "notANameSpace" as special value for inexisting namespaces
my $store_id = ($id || 'notANameSpace');
- # Store explicitely requested namespaces on disk
+ # Store explicitly requested namespaces on disk
if (!exists $cached_mw_namespace_id{$name}) {
run_git(qq(config --add remote.${remotename}.namespaceCache "${name}:${store_id}"));
$cached_mw_namespace_id{$name} = 1;
diff --git a/contrib/mw-to-git/t/t9365-continuing-queries.sh b/contrib/mw-to-git/t/t9365-continuing-queries.sh
new file mode 100755
index 0000000000..27e267f532
--- /dev/null
+++ b/contrib/mw-to-git/t/t9365-continuing-queries.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+test_description='Test the Git Mediawiki remote helper: queries w/ more than 500 results'
+
+. ./test-gitmw-lib.sh
+. $TEST_DIRECTORY/test-lib.sh
+
+test_check_precond
+
+test_expect_success 'creating page w/ >500 revisions' '
+ wiki_reset &&
+ for i in `test_seq 501`
+ do
+ echo "creating revision $i" &&
+ wiki_editpage foo "revision $i<br/>" true
+ done
+'
+
+test_expect_success 'cloning page w/ >500 revisions' '
+ git clone mediawiki::'"$WIKI_URL"' mw_dir
+'
+
+test_done
diff --git a/contrib/mw-to-git/t/test-gitmw-lib.sh b/contrib/mw-to-git/t/test-gitmw-lib.sh
index bb76cee379..3372b2af34 100755
--- a/contrib/mw-to-git/t/test-gitmw-lib.sh
+++ b/contrib/mw-to-git/t/test-gitmw-lib.sh
@@ -62,12 +62,8 @@ test_check_precond () {
test_done
fi
- if [ ! -f "$GIT_BUILD_DIR"/git-remote-mediawiki ];
- then
- echo "No remote mediawiki for git found. Copying it in git"
- echo "cp $GIT_BUILD_DIR/contrib/mw-to-git/git-remote-mediawiki $GIT_BUILD_DIR/"
- ln -s "$GIT_BUILD_DIR"/contrib/mw-to-git/git-remote-mediawiki "$GIT_BUILD_DIR"
- fi
+ GIT_EXEC_PATH=$(cd "$(dirname "$0")" && cd "../.." && pwd)
+ PATH="$GIT_EXEC_PATH"'/bin-wrapper:'"$PATH"
if [ ! -d "$WIKI_DIR_INST/$WIKI_DIR_NAME" ];
then
@@ -95,7 +91,7 @@ test_diff_directories () {
# Check that <dir> contains exactly <N> files
test_contains_N_files () {
if test `ls -- "$1" | wc -l` -ne "$2"; then
- echo "directory $1 sould contain $2 files"
+ echo "directory $1 should contain $2 files"
echo "it contains these files:"
ls "$1"
false
diff --git a/contrib/mw-to-git/t/test.config b/contrib/mw-to-git/t/test.config
index 4cfebe9c69..5ba0684162 100644
--- a/contrib/mw-to-git/t/test.config
+++ b/contrib/mw-to-git/t/test.config
@@ -12,7 +12,7 @@ SERVER_ADDR=localhost
TMP=/tmp
DB_FILE=wikidb.sqlite
-# If LIGHTTPD is not set to true, the script will use the defaut
+# If LIGHTTPD is not set to true, the script will use the default
# web server running in WIKI_DIR_INST.
WIKI_DIR_INST=/var/www