5 use File::Slurp qw(slurp);
6 use YAML::Syck qw(Dump Load LoadFile DumpFile);
8 $YAML::Syck::Headless = 1;
9 $YAML::Syck::SortKeys = 1;
12 use HTML::TableParser::Grid;
15 use Data::Dump 'dump';
16 use File::Spec::Functions qw(catfile);
22 merge-from-translatewiki - Get new translations from L<http://translatewiki.net> and selectively merge them with ours
26 # Run this normally, hopefully...
27 merge-from-translatewiki --locales-dir=config/locales
29 # Diff the existing files:
30 config/locales$ for i in $(ls *yml | grep -v en.yml); do perl ../../script/locale/diff --dump-flat $i > $i.0 ;done
32 # Merge and find out what changed:
33 rails_port$ for i in $(svn st config/locales/ | egrep '^M|\\?' | awk '{print $2}' | grep 'yml$'); do rm -v $i; done && svn up config/locales && perl script/locale/merge-from-translatewiki --locales-dir config/locales && svn st config/locales
36 config/locales$ for i in $(ls *yml | grep -v en.yml); do perl ../../script/locale/diff --dump-flat $i > $i.1 ;done && for i in $(ls *yml | grep -v en.yml); do diff -ru $i.*; done
40 Translatewiki's export process L<is
41 broken|http://trac.openstreetmap.org/ticket/2305>. This script imports
42 new messages from it while tiptoeing around known bugs.
50 Print this help message.
54 The locales dir we'll merge stuff into. E.g. C<config/locales>.
58 Only import translations that don't exists for us yet.
64 E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avarab@gmail.com>
68 # Get the command-line options
69 Getopt::Long::Parser->new(
70 config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
72 'h|help' => \my $help,
73 'locales-dir=s' => \my $locales_dir,
74 'only-new' => \my $only_new,
80 help() unless $locales_dir;
86 ### Get Translatewiki data
87 my %translatewiki_languages = translatewiki_languages();
89 # Don't process English from Translatewiki
90 delete $translatewiki_languages{en};
92 #say Dump \%translatewiki_languages;
94 my @translatewiki_languages_codes = keys %translatewiki_languages;
95 my %translatewiki_translations = get_translatewiki_translations(@translatewiki_languages_codes);
97 #say Dump \%translatewiki_translations;
99 ### Get our existing data
101 my @my_yaml_files = glob catfile($locales_dir, '*.yml');
102 for my $my_yaml_file (@my_yaml_files) {
103 my $basename = basename($my_yaml_file);
104 my $tw_lang = lc $basename;
106 say STDERR "Loading my translation $tw_lang ($my_yaml_file)";
107 $my_translations{$tw_lang} = load_and_flatten_yaml(scalar slurp($my_yaml_file));
110 say "loaded my translations";
112 ## Write out merged data
113 for my $translatewiki_lang (sort @translatewiki_languages_codes) {
114 my $rails_lang = $translatewiki_lang; $rails_lang =~ s/(?<=-)(\w+)/\U$1\E/;
115 my $out_file = catfile($locales_dir, $rails_lang . '.yml');
117 unless (-f $out_file) {
118 # No translation like this exists
119 say STDERR "$rails_lang has no existing translation. Importing as-is from Translatewiki to $out_file";
120 my $expanded = expand_hash($translatewiki_translations{$translatewiki_lang});
121 my $out = +{ $rails_lang => $expanded };
122 spit_out($out_file, $out);
123 } elsif (ref $my_translations{$translatewiki_lang} eq 'HASH' and not $only_new) {
124 say STDERR "$rails_lang has existing translations. Merging the old translation with the new Translatewiki one";
127 my %tw = %{ $translatewiki_translations{$translatewiki_lang} };
128 my %me = %{ $my_translations{$translatewiki_lang} };
129 my %en = %{ $my_translations{en} };
130 # Use %tw to start with
135 ## These keys shouldn't be removed but are due to
136 ## Translatewiki fail (they were missing in the original
139 browse.relation_member.entry
140 changeset.changeset.id
141 geocoder.search_osm_namefinder.suffix_suburb
143 layouts.intro_3_bytemark
145 layouts.project_name.h1
146 layouts.project_name.title
147 printable_name.with_version
149 layouts.help_wiki_url
151 notifier.gpx_notification.failure.import_failures_url
152 notifier.signup_confirm_plain.the_wiki_url
153 notifier.signup_confirm_plain.wiki_signup_url
154 trace.edit.visibility_help_url
155 trace.trace_form.help_url
156 trace.trace_form.visibility_help_url
159 for my $key (@url_keys) {
160 if ( exists $me{$key} and not exists $new{$key} ) {
161 $new{$key} = $me{$key} if $me{$key} ne $en{$key};
165 ## When foo exists in this file but only foo.one, foo,other
166 ## etc in English or the original file we don't want to throw away what we have
167 my @plural_keys = qw( zero one many few other two );
169 while (my ($me_k, $me_v) = each %me) {
170 if (not exists $tw{ $me_k } and
171 not exists $en{ $me_k } and
173 exists $en{ $me_k . '.zero' } or
174 exists $en{ $me_k . '.one' } or
175 exists $en{ $me_k . '.many' } or
176 exists $en{ $me_k . '.few' } or
177 exists $en{ $me_k . '.other' } or
178 exists $en{ $me_k . '.two' })) {
179 #say STDERR "Bringing back nuked plural form '$me_k' Setting it to '$me{ $me_k }'";
180 $new{ $me_k } = $me{ $me_k };
184 # Both arrays and strings are supported in the site key. Avoid removing e.g.:
185 # -site.key.table.entry.school: 學校;大學
186 # Just because en.yml has site.key.table.entry.school.0 and site.key.table.entry.school.1
187 while (my ($me_k, $me_v) = each %me) {
188 next unless $me_k =~ /^site\.key\.table\.entry/;
189 next if $me_k =~ /\.\d+$/;
191 if (ref $en{ $me_k } eq 'ARRAY' and not ref $me{ $me_k }) {
192 $new{ $me_k } = $me{ $me_k };
196 my $expanded = expand_hash( \%new );
197 my $out = +{ $rails_lang => $expanded };
198 spit_out($out_file, $out);
199 } elsif (not $only_new) {
200 die "Internal error on $translatewiki_lang";
206 my ($file, $data) = @_;
207 my $yaml_out = Dump $data;
209 open my $fh, ">", $file;
223 map { if (ref $_ eq 'ARRAY') { map { utf8::decode($_) } @$_ } else { utf8::decode($_) } } values %$hash;
228 my ($hash, @path) = @_;
231 while (my ($k, $v) = each %$hash)
233 if (ref $v eq 'HASH')
235 push @ret => iterate($v, @path, $k);
239 push @ret => join(".",@path, $k), $v;
248 my ($flat_hash) = @_;
250 while (my ($k, $v) = each %$flat_hash) {
251 #say "Inserting $k=$v";
252 insert_string_deep(\%new_hash, $k, $v);
258 # Fails under strict in certain cases:
259 ## Inserting browse.start_rjs.object_list.history.type.way=Vía [[id]]
260 ## Inserting activerecord.models.relation_tag=Etiqueta de la relación
261 ## Inserting browse.changeset_details.has_nodes.one=Tiene el siguiente {{count}} nodo:
262 ## Can't use string ("Tiene {{count}} nodos:") as a HASH ref while "strict refs" in use at script/locale/merge-from-translatewiki line 234.
263 # Line 234 = my $p = \$h; $p = \$$p->{$_} for split /\./, $ks;
265 # sub insert_string_deep_X {
266 # my ($h, $ks, $v) = @_;
267 # my $p = \$h; $p = \$$p->{$_} for split /\./, $ks;
271 sub insert_string_deep
273 my ($hash, $key, $value) = @_;
275 my @key = split /\./, $key;
294 # Get language from Translatewiki
297 sub get_translatewiki_translations
301 my $cache_file = "/tmp/merge-from-translatewiki.storable";
302 if (-f $cache_file) {
303 my $cache = retrieve($cache_file);
307 my %translatewiki_languages;
308 my $all_count = scalar @languages;
309 say "Translatewiki has $all_count languages I'm about to get";
311 for my $lang (@languages) {
313 say STDERR "Getting language $count/$all_count ($lang) from Translatewiki";
314 my $yaml = get_language_from_translatewiki($lang);
316 my $flat_data = load_and_flatten_yaml($yaml);
318 $translatewiki_languages{$lang} = $flat_data;
321 store \%translatewiki_languages, $cache_file;
323 return %translatewiki_languages;
326 sub get_language_from_translatewiki
329 my $mech = WWW::Mechanize->new;
331 $mech->get("http://translatewiki.net/w/i.php?title=Special%3ATranslate&task=export-to-file&group=out-osm&language=$lang");
333 die "Couldn't get lang $lang lang from Translatewiki" unless $mech->success;
335 return $mech->content;
342 sub translatewiki_languages
344 my $mech = WWW::Mechanize->new;
346 $mech->get('http://translatewiki.net/wiki/Translating:OpenStreetMap/stats/trunk');
348 die "Couldn't get translatewiki table" unless $mech->success;
350 my $content = $mech->content;
351 my ($sortable) = $content =~ m[(<table class="sortable.*</table>)]s;
353 my @table = parse_language_table($sortable);
356 map { $_->{code} => $_->{language} } @table;
359 sub parse_language_table
363 my $parser = HTML::TableParser::Grid->new($table);
365 for my $n (0 .. $parser->num_rows - 1) {
367 @row{qw(code language done fuzzy)} = $parser->row($n);
383 $name =~ s[\..*?$][];
388 sub load_and_flatten_yaml
392 my $data = Load($yaml);
394 # Remove the root $lang => key
395 my @keys = keys %$data;
396 die "YAML data had more than 1 root key" if @keys != 1;
397 $data = $data->{$keys[0]};
400 my $flat_data = { iterate($data) };
402 mark_utf8($flat_data);
415 Pod::Usage::pod2usage(
416 -verbose => $arg{ verbose },
417 -exitval => $arg{ exitval } || 0,