#!/usr/bin/perl
use utf8;
my $username = 'ZX81-bot';
my $password = '********';
my $lang = 'fr';
my $eilimit = "5000";
use strict;
use LWP::UserAgent;
use XML::DOM;
use locale;
use POSIX qw(locale_h);
use HTML::Entities;
use Encode;
setlocale(LC_COLLATE, "fr_FR.UTF-8");
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";
my $title = 'Modèle:Infobox Pays';
my (@articles,$article);
my $site_url = 'http://'.$lang.'.wikipedia.org';
my $login_url = $site_url . '/w/index.php?title=Special:Userlogin&action=submitlogin&type=login';
my $query_url = $site_url . '/w/query.php?what=embeddedin&titles=%s&eilimit=%s&eicontfrom=%s&format=xml';
my $edit_url = $site_url . '/w/index.php?action=edit&title=%s';
my $en_url = 'http://en.wikipedia.org' . '/w/index.php?action=raw&title=%s';
my $url;
my $ua = LWP::UserAgent->new();
$ua->agent('ZX81-bot/0.1 ' . $ua->_agent);
$ua->cookie_jar({ file => "$ENV{HOME}/.wikipedia.cookies", autosave => 1 });
$ua->default_header('Accept-Language' => 'fr, fr-fr, en, en-us');
$ua->default_header('Accept-Charset' => 'utf-8');
my $rep = $ua->post($login_url, [ wpName => $username,
wpPassword => $password,
wpRemember => '1',
wpLoginAttempt => 'Identification' ]);
die "L'authentification a échoué" if ($rep->code != 302);
print STDDERR "Auth : Ok\n";
my $eicontfrom = "";
my ($parser,$doc);
my ($ei,$ns,$query);
while (defined $eicontfrom) {
$url = sprintf $query_url, $title, $eilimit, $eicontfrom;
$rep = $ua->get($url);
if ($rep->is_error) {
print STDERR $rep->headers_as_string;
die "La requête [$url] a échoué.\n", $rep->status_line, "\n";
}
$parser = new XML::DOM::Parser;
$doc = $parser->parse($rep->content);
foreach $ei ($doc->getElementsByTagName("ei")) {
if (!$ei->getAttribute("ns")) {
push @articles, $ei->getFirstChild->getNodeValue;
}
}
$query = $doc->getElementsByTagName("query");
if ($query->getLength != 0) {
$eicontfrom = $query->item(0)->getElementsByTagName("embeddedin")->item(0)->getAttribute("next");
} else {
$eicontfrom = undef;
}
}
$| = 0;
my ($page, $token, $time, $text, $enarticle, $summary);
my ($hdi,$hdi_year,$hdi_rank,$hdi_category,$idh);
foreach $article (@articles) {
print "fr:$article : GET\n";
$url = sprintf $edit_url, $article;
$rep = $ua->get($url);
if ($rep->is_error) {
print STDERR $rep->headers_as_string;
die "La requête [$url] a échoué.\n", $rep->status_line, "\n";
}
$page = $rep->content;
Encode::_utf8_on($page);
$page =~ m{<input type='hidden' value="([^"]*)" name="wpEditToken" />}s;
$token = $1;
$page =~ m{<input type='hidden' value="([^"]*)" name="wpEdittime" />}s;
$time = $1;
$page =~ m{<textarea [^>]*name="wpTextbox1"[^>]*>(.*)</textarea>}s;
$text = $1;
HTML::Entities::decode($text);
if ($text =~ /\|\s*IDH\s*=/s) {
print "fr:$article : passé\n";
next;
}
($hdi,$hdi_year,$hdi_rank,$hdi_category) = ("","","","");
if ($text =~ /\[\[[ _]*en[ _]*:[ _]*([^\]]+)[ _]*\]\]/) {
$enarticle = $1;
print "en:$enarticle : GET\n";
$url = sprintf $en_url, $enarticle;
$rep = $ua->get($url);
if ($rep->is_error) {
print STDERR $rep->headers_as_string;
die "La requête [$url] a échoué.\n", $rep->status_line, "\n";
}
$page = $rep->content;
Encode::_utf8_on($page);
if ($page =~ /\|\s*HDI\s*=\s*(\{\{[^\}]+\}\})?\s*(\d+)\.(\d+)/) {
$hdi = "$1 $2,$3";
$hdi =~ s/^\s+//;
$hdi =~ s/\s+$//;
}
if ($page =~ /\|\s*HDI_rank\s*=\s*[^\|]*?(\d*)/) {
$hdi_rank = $1;
$hdi_rank =~ s/(?<=\d)$/{{e}}/g;
}
if ($page =~ /\|\s*HDI_year\s*=\s*(\d+)/) {
$hdi_year = $1;
}
if ($page =~ /\|\s*HDI_category\s*=\s*[^|]*?(low|medium|high)/) {
$hdi_category = $1;
$hdi_category =~ s/high/élevé/;
$hdi_category =~ s/medium/moyen/;
$hdi_category =~ s/low/bas/;
}
}
$idh = "| IDH=$hdi\n";
$idh .= "| IDH_année=$hdi_year\n";
$idh .= "| IDH_catégorie=$hdi_category\n";
$idh .= "| IDH_rang=$hdi_rank\n";
$text =~ s/\|\s*(monnaie\s*=)/$idh| $1/s;
Encode::_utf8_off($text);
$summary = 'Robot : ajout des paramètres IDH dans {{Infobox Pays}}';
Encode::_utf8_off($summary);
$url = sprintf $edit_url, $article;
$rep = $ua->post($url, [ wpTextbox1 => $text,
wpMinoredit => '1',
wpSummary => $summary,
wpEdittime => $time,
wpEditToken => $token ]);
if ($rep->is_error) {
print STDERR $rep->headers_as_string;
die "La requête [$url] a échoué.\n", $rep->status_line, "\n";
}
}