5 use File::Slurp qw(slurp);
6 use YAML::Syck qw(Dump Load LoadFile DumpFile);
8 $YAML::Syck::Headless = 1;
9 $YAML::Syck::SortKeys = 1;
12 use HTML::TableParser::Grid;
15 use Data::Dump 'dump';
16 use File::Spec::Functions qw(catfile);
22 merge-from-translatewiki - Get new translations from L<http://translatewiki.net> and selectively merge them with ours
26 # Run this normally, hopefully...
27 merge-from-translatewiki --locales-dir=config/locales
29 # Diff the existing files:
30 config/locales$ for i in $(ls *yml | grep -v en.yml); do perl ../../script/locale/diff --dump-flat $i > $i.0 ;done
32 # Merge and find out what changed:
33 rails_port$ perl script/locale/merge-from-translatewiki --locales-dir config/locales
36 rails_port$ for i in $(svn st config/locales/ | egrep '^M|\\?' | awk '{print $2}' | grep 'yml$'); do rm -v $i; done && svn up config/locales && perl script/locale/merge-from-translatewiki --locales-dir config/locales && svn st config/locales
39 config/locales$ for i in $(ls *yml | grep -v en.yml); do perl ../../script/locale/diff --dump-flat $i > $i.1 ;done && for i in $(ls *yml | grep -v en.yml); do diff -ru $i.*; done
43 Translatewiki's export process L<is
44 broken|http://trac.openstreetmap.org/ticket/2305>. This script imports
45 new messages from it while tiptoeing around known bugs.
53 Print this help message.
57 The locales dir we'll merge stuff into. F<config/locales> by default.
61 Only import translations that don't exists for us yet.
67 E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avarab@gmail.com>
71 # Get the command-line options
72 Getopt::Long::Parser->new(
73 config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
75 'h|help' => \my $help,
76 'locales-dir=s' => \(my $locales_dir = 'config/locales'),
77 'only-new' => \my $only_new,
83 help() unless $locales_dir and -d $locales_dir;
89 ### Get Translatewiki data
90 my %translatewiki_languages = translatewiki_languages();
92 # Don't process English from Translatewiki
93 delete $translatewiki_languages{en};
95 #say Dump \%translatewiki_languages;
97 my @translatewiki_languages_codes = keys %translatewiki_languages;
98 my %translatewiki_translations = get_translatewiki_translations(@translatewiki_languages_codes);
100 #say Dump \%translatewiki_translations;
102 ### Get our existing data
104 my @my_yaml_files = glob catfile($locales_dir, '*.yml');
105 for my $my_yaml_file (@my_yaml_files) {
106 my $basename = basename($my_yaml_file);
107 my $tw_lang = lc $basename;
109 say STDERR "Loading my translation $tw_lang ($my_yaml_file)";
110 $my_translations{$tw_lang} = load_and_flatten_yaml(scalar slurp($my_yaml_file));
113 say "loaded my translations";
115 ## Write out merged data
116 for my $translatewiki_lang (sort @translatewiki_languages_codes) {
117 my $rails_lang = $translatewiki_lang; $rails_lang =~ s/(?<=-)(\w+)/\U$1\E/;
118 my $out_file = catfile($locales_dir, $rails_lang . '.yml');
120 unless (-f $out_file) {
121 # No translation like this exists
122 say STDERR "$rails_lang has no existing translation. Importing as-is from Translatewiki to $out_file";
123 my $expanded = expand_hash($translatewiki_translations{$translatewiki_lang});
124 my $out = +{ $rails_lang => $expanded };
125 spit_out($out_file, $out);
126 } elsif (ref $my_translations{$translatewiki_lang} eq 'HASH' and not $only_new) {
127 say STDERR "$rails_lang has existing translations. Merging the old translation with the new Translatewiki one";
130 my %tw = %{ $translatewiki_translations{$translatewiki_lang} };
131 my %me = %{ $my_translations{$translatewiki_lang} };
132 my %en = %{ $my_translations{en} };
133 # Use %tw to start with
138 ## These keys shouldn't be removed but are due to
139 ## Translatewiki fail (they were missing in the original
142 browse.relation_member.entry
143 changeset.changeset.id
144 geocoder.search_osm_namefinder.suffix_suburb
146 layouts.intro_3_bytemark
148 layouts.project_name.h1
149 layouts.project_name.title
150 printable_name.with_version
152 layouts.help_wiki_url
154 notifier.gpx_notification.failure.import_failures_url
155 notifier.signup_confirm_plain.the_wiki_url
156 notifier.signup_confirm_plain.wiki_signup_url
157 trace.edit.visibility_help_url
158 trace.trace_form.help_url
159 trace.trace_form.visibility_help_url
162 for my $key (@url_keys) {
163 if ( exists $me{$key} and not exists $new{$key} ) {
164 $new{$key} = $me{$key} if $me{$key} ne $en{$key};
168 ## When foo exists in this file but only foo.one, foo,other
169 ## etc in English or the original file we don't want to throw away what we have
170 my @plural_keys = qw( zero one many few other two );
172 while (my ($me_k, $me_v) = each %me) {
173 if (not exists $tw{ $me_k } and
174 not exists $en{ $me_k } and
176 exists $en{ $me_k . '.zero' } or
177 exists $en{ $me_k . '.one' } or
178 exists $en{ $me_k . '.many' } or
179 exists $en{ $me_k . '.few' } or
180 exists $en{ $me_k . '.other' } or
181 exists $en{ $me_k . '.two' })) {
182 #say STDERR "Bringing back nuked plural form '$me_k' Setting it to '$me{ $me_k }'";
183 $new{ $me_k } = $me{ $me_k };
187 # Both arrays and strings are supported in the site key. Avoid removing e.g.:
188 # -site.key.table.entry.school: 學校;大學
189 # Just because en.yml has site.key.table.entry.school.0 and site.key.table.entry.school.1
190 while (my ($me_k, $me_v) = each %me) {
191 next unless $me_k =~ /^site\.key\.table\.entry/;
192 next if $me_k =~ /\.\d+$/;
194 if (ref $en{ $me_k } eq 'ARRAY' and not ref $me{ $me_k }) {
195 $new{ $me_k } = $me{ $me_k };
199 # There are a bunch of keys on Translatewiki that are
200 # equivalent to English for some reason. Probably because they
201 # were there at import time. Nuke them.
202 while (my ($new_k, $new_v) = each %new) {
203 if (exists $en{ $new_k } and $en{ $new_k } eq $new_v) {
204 say "Purging dupe in $rails_lang: $new_k=$new_v";
205 delete $new{ $new_k };
209 my $expanded = expand_hash( \%new );
210 my $out = +{ $rails_lang => $expanded };
211 spit_out($out_file, $out);
212 } elsif (not $only_new) {
213 die "Internal error on $translatewiki_lang";
219 my ($file, $data) = @_;
220 my $yaml_out = Dump $data;
222 open my $fh, ">", $file;
236 map { if (ref $_ eq 'ARRAY') { map { utf8::decode($_) } @$_ } else { utf8::decode($_) } } values %$hash;
241 my ($hash, @path) = @_;
244 while (my ($k, $v) = each %$hash)
246 if (ref $v eq 'HASH')
248 push @ret => iterate($v, @path, $k);
252 push @ret => join(".",@path, $k), $v;
261 my ($flat_hash) = @_;
263 while (my ($k, $v) = each %$flat_hash) {
264 #say "Inserting $k=$v";
265 insert_string_deep(\%new_hash, $k, $v);
271 # Fails under strict in certain cases:
272 ## Inserting browse.start_rjs.object_list.history.type.way=Vía [[id]]
273 ## Inserting activerecord.models.relation_tag=Etiqueta de la relación
274 ## Inserting browse.changeset_details.has_nodes.one=Tiene el siguiente {{count}} nodo:
275 ## Can't use string ("Tiene {{count}} nodos:") as a HASH ref while "strict refs" in use at script/locale/merge-from-translatewiki line 234.
276 # Line 234 = my $p = \$h; $p = \$$p->{$_} for split /\./, $ks;
278 # sub insert_string_deep_X {
279 # my ($h, $ks, $v) = @_;
280 # my $p = \$h; $p = \$$p->{$_} for split /\./, $ks;
284 sub insert_string_deep
286 my ($hash, $key, $value) = @_;
288 my @key = split /\./, $key;
307 # Get language from Translatewiki
310 sub get_translatewiki_translations
314 my $cache_file = "/tmp/merge-from-translatewiki.storable";
315 if (-f $cache_file) {
316 my $cache = retrieve($cache_file);
320 my %translatewiki_languages;
321 my $all_count = scalar @languages;
322 say "Translatewiki has $all_count languages I'm about to get";
324 for my $lang (@languages) {
326 say STDERR "Getting language $count/$all_count ($lang) from Translatewiki";
327 my $yaml = get_language_from_translatewiki($lang);
329 my $flat_data = load_and_flatten_yaml($yaml);
331 $translatewiki_languages{$lang} = $flat_data;
334 store \%translatewiki_languages, $cache_file;
336 return %translatewiki_languages;
339 sub get_language_from_translatewiki
342 my $mech = WWW::Mechanize->new;
344 $mech->get("http://translatewiki.net/w/i.php?title=Special%3ATranslate&task=export-to-file&group=out-osm&language=$lang");
346 die "Couldn't get lang $lang lang from Translatewiki" unless $mech->success;
348 return $mech->content;
355 sub translatewiki_languages
357 my $mech = WWW::Mechanize->new;
359 $mech->get('http://translatewiki.net/wiki/Translating:OpenStreetMap/stats/trunk');
361 die "Couldn't get translatewiki table" unless $mech->success;
363 my $content = $mech->content;
364 my ($sortable) = $content =~ m[(<table class="sortable.*</table>)]s;
366 my @table = parse_language_table($sortable);
369 map { $_->{code} => $_->{language} } @table;
372 sub parse_language_table
376 my $parser = HTML::TableParser::Grid->new($table);
378 for my $n (0 .. $parser->num_rows - 1) {
380 @row{qw(code language done fuzzy)} = $parser->row($n);
396 $name =~ s[\..*?$][];
401 sub load_and_flatten_yaml
405 my $data = Load($yaml);
407 # Remove the root $lang => key
408 my @keys = keys %$data;
409 die "YAML data had more than 1 root key" if @keys != 1;
410 $data = $data->{$keys[0]};
413 my $flat_data = { iterate($data) };
415 mark_utf8($flat_data);
428 Pod::Usage::pod2usage(
429 -verbose => $arg{ verbose },
430 -exitval => $arg{ exitval } || 0,