9 use Test::More qw(no_plan);
10 use YAML::XS qw(Dump);
14 update-wiki-pages - Scrape the wiki for key/value wiki description pages
18 perl script/misc/update-wiki-pages config/wiki_pages.yml
22 prove -e 'perl script/misc/update-wiki-pages' config/wiki_pages.yml
26 # Get the command-line options
27 Getopt::Long::Parser->new(
28 config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
30 'h|help' => \my $help,
36 my $out_file = $ARGV[0];
37 $out_file //= 'config/wiki_pages.yml';
39 help() unless -f $out_file;
42 my $mw = MediaWiki::API->new();
43 ok($mw, "Got a MediaWiki API");
44 $mw->{config}->{api_url} = 'https://wiki.openstreetmap.org/w/api.php';
45 $mw->{config}->{retries} = 5;
46 $mw->{config}->{retry_delay} = 30;
49 my (%feature, %count);
51 # This is what you get on:
52 ## http://wiki.openstreetmap.org/w/index.php?search=Template:KeyDescription&fulltext=Search&fulltext=Search
53 for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) {
54 ok(1, " Templates for language '$lang'");
57 ok(1, " Getting key pages");
58 my $cnt = stick_content_in_hash("key", "Template:${lang}KeyDescription", \%feature);
59 $cnt += stick_content_in_hash("key", "Template:${lang}Feature", \%feature);
60 ok(1, " Got $cnt key pages");
64 ok(1, " Getting value pages");
65 $cnt = stick_content_in_hash("tag", "Template:${lang}ValueDescription", \%feature);
66 ok(1, " Got $cnt value pages");
67 $count{value} += $cnt;
70 ok(1, "Got a total of $count{$_} ${_}s") for qw[ key value ];
73 open my $out, ">", $out_file or die "Can't open file '$out_file' supplied on the command line";
74 say $out "# THIS FILE IS AUTOGENERATED WITH THE script/misc/update-wiki-pages";
75 say $out "# PROGRAM DO NOT MANUALLY EDIT IT";
77 say $out Dump(\%feature);
82 sub stick_content_in_hash
84 my ($key, $title, $hash) = @_;
85 my $ukey = ucfirst $key;
87 my $space_to_underscore = sub {
95 my $process_link = sub {
98 ok(1, " ... got $count links") if $count % 200 == 0;
99 my $title = $link->{title};
102 if ($title =~ /^$ukey:(?<key_name>.*?)$/) {
105 $key_name = $space_to_underscore->($+{key_name});
106 } elsif ($title =~ /^(?<lang>[^:]+):$ukey:(?<key_name>.*?)$/) {
108 $key_name = $space_to_underscore->($+{key_name});
110 if ($lang && !exists($hash->{$lang}->{$key}->{$key_name})) {
111 $hash->{$lang}->{$key}->{$key_name} = $title;
119 $process_link->($link);
124 $process_link->($link) if exists($link->{redirect});
135 my $callback = shift;
137 for my $link (@$links) {
144 my ($title, $callback) = @_;
145 my $articles = $mw->list(
148 list => 'embeddedin',
150 eifilterredir => 'nonredirects',
151 # Doesn't work for De:* and anything non-en. Odd.
152 # einamespace => '0|8',
157 hook => sub { process_list($callback, @_) },
160 ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
165 my ($title, $callback) = @_;
166 my $articles = $mw->list(
171 blfilterredir => 'redirects',
172 # Doesn't work for De:* and anything non-en. Odd.
173 # einamespace => '0|8',
178 hook => sub { process_list($callback, @_) },
181 ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
188 Pod::Usage::pod2usage(
189 -verbose => $arg{ verbose },
190 -exitval => $arg{ exitval } || 0,