From ce404d1afba755ca09ffb62b5d649259da503d4e Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Wed, 18 Jun 2014 19:47:30 +0100 Subject: [PATCH] Fix update-wiki-pages to resolve redirects correctly --- script/misc/update-wiki-pages | 76 ++++++++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/script/misc/update-wiki-pages b/script/misc/update-wiki-pages index e01e9cae0..be69b2c4a 100755 --- a/script/misc/update-wiki-pages +++ b/script/misc/update-wiki-pages @@ -97,29 +97,54 @@ sub stick_content_in_hash }; my $count = 0; + + my $process_link = sub { + my $link = shift; + $count++; + ok(1, " ... got $count links") if $count % 200 == 0; + my $title = $link->{title}; + my $lang; + my $key_name; + if ($title =~ /^$ukey:(?.*?)$/) { + # English by default + $lang = "en"; + $key_name = $space_to_underscore->($+{key_name}); + } elsif ($title =~ /^(?[^:]+):$ukey:(?.*?)$/) { + $lang = lc $+{lang}; + $key_name = $space_to_underscore->($+{key_name}); + } + if ($lang && !exists($hash->{$lang}->{$key}->{$key_name})) { + $hash->{$lang}->{$key}->{$key_name} = $title; + } + }; + get_embeddedin( $title, sub { - my ($links) = @_; - my (@links) = @$links; - ok(1, " ... got " . scalar(@links) . " more links"); - for my $link (@links) { - $count++; - my $title = $link->{title}; - - if ($title =~ /^$ukey:(?.*?)$/) { - # English by default - $hash->{en}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title; - } elsif ($title =~ /^(?[^:]+):$ukey:(?.*?)$/) { - $hash->{lc $+{lang}}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title; + my $link = shift; + $process_link->($link); + get_redirects( + $link->{title}, + sub { + my $link = shift; + $process_link->($link) if exists($link->{redirect}); } - } + ); } ); return $count; } +sub process_list +{ + my $callback = shift; + my $links = shift; + for my $link (@$links) { + $callback->($link); + } +} + sub get_embeddedin { my ($title, $callback) = @_; @@ -128,14 +153,35 @@ sub get_embeddedin action => 'query', list => 'embeddedin', eititle => $title, - eifilterredir => 'all', + eifilterredir => 'nonredirects', # Doesn't work for De:* and anything non-en. Odd. # einamespace => '0|8', eilimit => '200', }, { max => '0', - hook => $callback, + hook => sub { process_list($callback, @_) }, + skip_encoding => 1, + } + ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; +} + +sub get_redirects +{ + my ($title, $callback) = @_; + my $articles = $mw->list( + { + action => 'query', + list => 'backlinks', + bltitle => $title, + blfilterredir => 'redirects', + # Doesn't work for De:* and anything non-en. Odd. + # einamespace => '0|8', + bllimit => '200', + }, + { + max => '0', + hook => sub { process_list($callback, @_) }, skip_encoding => 1, } ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; -- 2.39.5