9 eval "require MediaWiki::API; require YAML::XS;" or do {
10 print "You have to install some modules via CPAN to run this:\n";
11 print " sudo cpanp MediaWiki::API YAML::XS\n";
17 use YAML::XS qw(Dump);
18 use Test::More 'no_plan';
22 update-key-and-tag-description-pages-from-wiki - Screen-scrape the wiki for key/value wiki description pages
26 perl script/misc/update-key-and-tag-description-pages-from-wiki config/wiki-tag-and-key-description.yml
30 This will break if there are more than 500 key or value pages. Paging
31 needs to be implemenented.
33 That or using a proper API or something (if it's there) or making a
34 direct query to the wiki database.
38 # Get the command-line options
39 Getopt::Long::Parser->new(
40 config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
42 'h|help' => \my $help,
48 help() unless $ARGV[0];
51 my $mw = MediaWiki::API->new();
52 ok($mw, "Got a MediaWiki API");
53 $mw->{config}->{api_url} = 'http://wiki.openstreetmap.org/w/api.php';
56 my (%feature, %count);
58 # This is what you get on:
59 ## http://wiki.openstreetmap.org/w/index.php?search=Template:KeyDescription&fulltext=Search&fulltext=Search
60 for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) {
61 ok(1, " Templates for language '$lang'");
64 for my $thing (qw(key value)) {
65 my $Thing = ucfirst $thing;
66 ok(1, " Getting $thing pages");
67 my $cnt = stick_content_in_hash($thing, "Template:${lang}${Thing}Description", \%feature);
68 ok(1, " Got $cnt $thing pages");
69 $count{$thing} += $cnt;
73 ok(1, "Got a total of $count{$_} ${_}s") for qw[ key value ];
76 open my $out, ">", $ARGV[0] or die "Can't open file '$ARGV[0]' supplied on the command line";
77 say $out "# THIS FILE IS AUTOGENERATED WITH THE script/misc/update-key-and-tag-description-pages-from-wiki";
78 say $out "# PROGRAM DO NOT MANUALLY EDIT IT";
80 say $out Dump(\%feature);
85 sub stick_content_in_hash
87 my ($key, $title, $hash) = @_;
88 my $ukey = ucfirst $key;
90 my $space_to_underscore = sub {
101 my (@links) = @$links;
102 ok(1, " ... got " . scalar(@links) . " more links");
103 for my $link (@links) {
105 my $title = $link->{title};
107 if ($title =~ /^$ukey:(?<key_name>.*?)$/) {
109 $hash->{en}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title;
110 } elsif ($title =~ /^(?<lang>[^:]+):$ukey:(?<key_name>.*?)$/) {
111 $hash->{lc $+{lang}}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title;
122 my ($title, $callback) = @_;
123 my $articles = $mw->list(
126 list => 'embeddedin',
128 eifilterredir => 'nonredirects',
129 # Doesn't work for De:* and anything non-en. Odd.
130 # einamespace => '0|8',
138 ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
145 Pod::Usage::pod2usage(
146 -verbose => $arg{ verbose },
147 -exitval => $arg{ exitval } || 0,