#! /usr/bin/perl
# Copyright (C) 2011
# Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr>
# Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr>
# Claire Fousse <claire.fousse@ensimag.imag.fr>
# David Amouyal <david.amouyal@ensimag.imag.fr>
# Matthieu Moy <matthieu.moy@grenoble-inp.fr>
# License: GPL v2 or later
# Gateway between Git and MediaWiki.
# Documentation & bugtracker: https://github.com/moy/Git-Mediawiki/
use strict;
use MediaWiki::API;
use Git;
use Git::Mediawiki qw(clean_filename smudge_filename connect_maybe
EMPTY HTTP_CODE_OK);
use DateTime::Format::ISO8601;
use warnings;
# By default, use UTF-8 to communicate with Git and the user
binmode STDERR, ':encoding(UTF-8)';
binmode STDOUT, ':encoding(UTF-8)';
use URI::Escape;
# It's not always possible to delete pages (may require some
# privileges). Deleted pages are replaced with this content.
use constant DELETED_CONTENT => "[[Category:Deleted]]\n";
# It's not possible to create empty pages. New empty files in Git are
# sent with this content instead.
use constant EMPTY_CONTENT => "<!-- empty page -->\n";
# used to reflect file creation or deletion in diff.
use constant NULL_SHA1 => '0000000000000000000000000000000000000000';
# Used on Git's side to reflect empty edit messages on the wiki
use constant EMPTY_MESSAGE => '*Empty MediaWiki Message*';
# Number of pages taken into account at once in submodule get_mw_page_list
use constant SLICE_SIZE => 50;
# Number of linked mediafile to get at once in get_linked_mediafiles
# The query is split in small batches because of the MW API limit of
# the number of links to be returned (500 links max).
use constant BATCH_SIZE => 10;
if (@ARGV != 2) {
exit_error_usage();
}
my $remotename = $ARGV[0];
my $url = $ARGV[1];
# Accept both space-separated and multiple keys in config file.
# Spaces should be written as _ anyway because we'll use chomp.
my @tracked_pages = split(/[ \n]/, run_git("config --get-all remote.${remotename}.pages"));
chomp(@tracked_pages);
# Just like @tracked_pages, but for MediaWiki categories.
my @tracked_categories = split(/[ \n]/, run_git("config --get-all remote.${remotename}.categories"));
chomp(@tracked_categories);
# Import media files on pull
my $import_media = run_git("config --get --bool remote.${remotename}.mediaimport");
chomp($import_media);
$import_media = ($import_media eq 'true');
# Export media files on push
my $export_media = run_git("config --get --bool remote.${remotename}.mediaexport");
chomp($export_media);
$export_media = !($export_media eq 'false');
my $wiki_login = run_git("config --get remote.${remotename}.mwLogin");
# Note: mwPassword is discourraged. Use the credential system instead.
my $wiki_passwd = run_git("config --get remote.${remotename}.mwPassword");
my $wiki_domain = run_git("config --get remote.${remotename}.mwDomain");
chomp($wiki_login);
chomp($wiki_passwd);
chomp($wiki_domain);
# Import only last revisions (both for clone and fetch)
my $shallow_import = run_git("config --get --bool remote.${remotename}.shallow");
chomp($shallow_import);
$shallow_import = ($shallow_import eq 'true');
# Fetch (clone and pull) by revisions instead of by pages. This behavior
# is more efficient when we have a wiki with lots of pages and we fetch
# the revisions quite often so that they concern only few pages.
# Possible values:
# - by_rev: perform one query per new revision on the remote wiki
# - by_page: query each tracked page for new revision
my $fetch_strategy = run_git("config --get remote.${remotename}.fetchStrategy");
if (!$fetch_strategy) {
$fetch_strategy = run_git('config --get mediawiki.fetchStrategy');
}
chomp($fetch_strategy);
if (!$fetch_strategy) {
$fetch_strategy = 'by_page';
}
# Remember the timestamp corresponding to a revision id.
my %basetimestamps;
# Dumb push: don't update notes and mediawiki ref to reflect the last push.
#
# Configurable with mediawiki.dumbPush, or per-remote with
# remote.<remotename>.dumbPush.
#
# This means the user will have to re-import the just-pushed
# revisions. On the other hand, this means that the Git revisions
# corresponding to MediaWiki revisions are all imported from the wiki,
# regardless of whether they were initially created in Git or from the
# web interface, hence all users will get the same history (i.e. if
# the push from Git to MediaWiki loses some information, everybody
# will get the history with information lost). If the import is
# deterministic, this means everybody gets the same sha1 for each
# MediaWiki revision.
my $dumb_push = run_git("config --get --bool remote.${remotename}.dumbPush");
if (!$dumb_push) {
$dumb_push = run_git('config --get --bool mediawiki.dumbPush');
}
chomp($dumb_push);
$dumb_push = ($dumb_push eq 'true');
my $wiki_name = $url;
$wiki_name =~ s{[^/]*://}{};
# If URL is like http://user:password@example.com/, we clearly don't
# want the password in $wiki_name. While we're there, also remove user
# and '@' sign, to avoid author like MWUser@HTTPUser@host.com
$wiki_name =~ s/^.*@//;
|