X-Git-Url: https://git.openstreetmap.org./rails.git/blobdiff_plain/61e51f88cc0242dadd3939cd0bbb5197a21cfb94..ca77bb4f7a814d160f3d7d314c09acf160f199e4:/script/misc/update-wiki-pages diff --git a/script/misc/update-wiki-pages b/script/misc/update-wiki-pages index 7cd40e59e..d40708c96 100755 --- a/script/misc/update-wiki-pages +++ b/script/misc/update-wiki-pages @@ -3,20 +3,11 @@ use 5.010; use strict; use warnings; -use Pod::Usage (); -use Getopt::Long (); - -BEGIN { - eval "require MediaWiki::API; require YAML::XS;" or do { - print "You have to install some modules via CPAN to run this:\n"; - print " sudo cpanp MediaWiki::API YAML::XS\n"; - exit 1; - }; -} - +use Getopt::Long; +use Pod::Usage; use MediaWiki::API; +use Test::More qw(no_plan); use YAML::XS qw(Dump); -use Test::More 'no_plan'; =head1 NAME @@ -50,7 +41,9 @@ help() unless -f $out_file; # Get a API interface my $mw = MediaWiki::API->new(); ok($mw, "Got a MediaWiki API"); -$mw->{config}->{api_url} = 'http://wiki.openstreetmap.org/w/api.php'; +$mw->{config}->{api_url} = 'https://wiki.openstreetmap.org/w/api.php'; +$mw->{config}->{retries} = 5; +$mw->{config}->{retry_delay} = 30; # All our goodies my (%feature, %count); @@ -63,9 +56,16 @@ for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) { # Key pages ok(1, " Getting key pages"); my $cnt = stick_content_in_hash("key", "Template:${lang}KeyDescription", \%feature); + $cnt += stick_content_in_hash("key", "Template:${lang}Feature", \%feature); ok(1, " Got $cnt key pages"); $count{key} += $cnt; + # Key prefix pages + ok(1, " Getting key prefix pages"); + $cnt = stick_content_in_hash("key", "Template:${lang}KeyPrefixDescription", \%feature); + ok(1, " Got $cnt key prefix pages"); + $count{keyprefix} += $cnt; + # Value pages ok(1, " Getting value pages"); $cnt = stick_content_in_hash("tag", "Template:${lang}ValueDescription", \%feature); @@ -73,7 +73,7 @@ for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) { $count{value} += $cnt; } -ok(1, "Got a total of $count{$_} ${_}s") for qw[ key value ]; +ok(1, "Got a total of $count{$_} ${_}s") for qw[ key keyprefix value ]; # Dump to .yml file open my $out, ">", $out_file or die "Can't open file '$out_file' supplied on the command line"; @@ -97,29 +97,54 @@ sub stick_content_in_hash }; my $count = 0; + + my $process_link = sub { + my $link = shift; + $count++; + ok(1, " ... got $count links") if $count % 200 == 0; + my $title = $link->{title}; + my $lang; + my $key_name; + if ($title =~ /^$ukey:(?.*?)$/) { + # English by default + $lang = "en"; + $key_name = $space_to_underscore->($+{key_name}); + } elsif ($title =~ /^(?[^:]+):$ukey:(?.*?)$/) { + $lang = lc $+{lang}; + $key_name = $space_to_underscore->($+{key_name}); + } + if ($lang && !exists($hash->{$lang}->{$key}->{$key_name})) { + $hash->{$lang}->{$key}->{$key_name} = $title; + } + }; + get_embeddedin( $title, sub { - my ($links) = @_; - my (@links) = @$links; - ok(1, " ... got " . scalar(@links) . " more links"); - for my $link (@links) { - $count++; - my $title = $link->{title}; - - if ($title =~ /^$ukey:(?.*?)$/) { - # English by default - $hash->{en}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title; - } elsif ($title =~ /^(?[^:]+):$ukey:(?.*?)$/) { - $hash->{lc $+{lang}}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title; + my $link = shift; + $process_link->($link); + get_redirects( + $link->{title}, + sub { + my $link = shift; + $process_link->($link) if exists($link->{redirect}); } - } + ); } ); return $count; } +sub process_list +{ + my $callback = shift; + my $links = shift; + for my $link (@$links) { + $callback->($link); + } +} + sub get_embeddedin { my ($title, $callback) = @_; @@ -135,7 +160,28 @@ sub get_embeddedin }, { max => '0', - hook => $callback, + hook => sub { process_list($callback, @_) }, + skip_encoding => 1, + } + ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; +} + +sub get_redirects +{ + my ($title, $callback) = @_; + my $articles = $mw->list( + { + action => 'query', + list => 'backlinks', + bltitle => $title, + blfilterredir => 'redirects', + # Doesn't work for De:* and anything non-en. Odd. + # einamespace => '0|8', + bllimit => '200', + }, + { + max => '0', + hook => sub { process_list($callback, @_) }, skip_encoding => 1, } ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};