X-Git-Url: https://git.openstreetmap.org./rails.git/blobdiff_plain/ca911e928ece17f3e697ef0cd970cc0f9b83a1ae..9aa029af7a3700730b8060125c6dd3dce625b5b7:/script/locale/diff?ds=sidebyside diff --git a/script/locale/diff b/script/locale/diff index 7ddfb3eb3..bfc63694d 100755 --- a/script/locale/diff +++ b/script/locale/diff @@ -1,7 +1,8 @@ #!/usr/bin/env perl +use feature ':5.10'; use strict; use warnings; -use YAML::Syck qw(Load LoadFile); +use YAML::Syck qw(Dump LoadFile); use Test::Differences; use Pod::Usage (); use Getopt::Long (); @@ -17,13 +18,16 @@ locale-diff - Compare two YAML files and print how their datastructures differ diff --keys en.yml is.yml # --untranslated-values compares prints keys whose values don't differ - diff --untranslated-values-all en.yml is.yml + diff --untranslated-values en.yml is.yml # --untranslated-values-all compares prints keys whose values # don't differ. Ignoring the blacklist which prunes things # unlikley to be translated diff --untranslated-values-all en.yml is.yml + # Check that interpolated variables ({{var}} and [[var]]) are the same + diff --validate-variables en.yml is.yml + =head1 DESCRIPTION This utility prints the differences between two YAML files using @@ -46,10 +50,12 @@ new entries from F to a local file. =item --untranslated-values -Show keys whose values are either exactly the same between the two -files, or don't exist in the target file (the latter file -specified). The values are pruned according to global and language -specific blacklists found in the C<__DATA__> section of this script. +Show keys that B and whose values +are exactly the same. Use C<--keys> to a list of values that hasn't +been merged. + +The values are pruned according to global and language specific +blacklists found in the C<__DATA__> section of this script. This helps to find untranslated values. @@ -57,11 +63,21 @@ This helps to find untranslated values. Like C<--untranslated-values> but ignores blacklists. +=item --validate-variables + +Check that interpolated Ruby i18n variables (C<{{foo}}> and +C<[[foo]]>) are equivalent in the two provided files. + +=item --dump-flat + +Dump a flat version of the translation hash in YAML format, +i.e. "foo.bar" instead of "{foo}->{bar}". + =back =head1 AUTHOR -Evar ArnfjErE Bjarmason +Evar ArnfjErE Bjarmason =cut @@ -71,31 +87,55 @@ Getopt::Long::Parser->new( )->getoptions( 'h|help' => \my $help, 'keys' => \my $keys, + 'dump-flat' => \my $dump_flat, 'untranslated-values' => \my $untranslated_values, 'untranslated-values-all' => \my $untranslated_values_all, + 'validate-variables' => \my $validate_variables, + 'reconstruct' => \my $reconstruct, ) or help(); # --keys is the default -$keys = 1 if not $untranslated_values_all and not $untranslated_values; +$keys = 1 if not $untranslated_values_all and not $untranslated_values and not $validate_variables and not $dump_flat; # On --help help() if $help; # If we're not given two .yml files -help() if @ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1]); +help() if (@ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1])) and not $dump_flat || $reconstruct; my ($from, $to) = @ARGV; my $from_data = LoadFile($from); -my $to_data = LoadFile($to); - my $from_parsed = { iterate($from_data->{basename($from)}) }; + +if ($dump_flat) +{ + mark_utf8($from_parsed); + + print Dump $from_parsed; + + exit 0; +} + +if ($reconstruct) { + mark_utf8($from_parsed); + + my %out; + while (my ($k, $v) = each %$from_parsed) { + insert_string_deep(\%out, $k, $v); + } + + print Dump { basename($from) => \%out }; + + exit 0; +} + +my $to_data = LoadFile($to); my $to_parsed = { iterate($to_data->{basename($to)}) }; -# Since this used to be the default, support that... if ($keys) { - print_key_differences(); + print_key_differences($from_parsed, $to_parsed); } elsif ($untranslated_values or $untranslated_values_all) { @@ -106,25 +146,30 @@ elsif ($untranslated_values or $untranslated_values_all) @untranslated = prune_untranslated_with_blacklist(basename($to), @untranslated); } - print $_, "\n" for @untranslated; + say for @untranslated; +} elsif ($validate_variables) +{ + print_validate_variables($from_parsed, $to_parsed); } exit 0; sub print_key_differences { + my ($f, $t) = @_; + # Hack around Test::Differences wanting a Test::* module loaded $INC{"Test.pm"} = 1; sub Test::ok { print shift } # Diff the tree - eq_or_diff([ sort keys %$from_parsed ], [ sort keys %$to_parsed ]); + eq_or_diff([ sort keys %$f ], [ sort keys %$t ]); } sub untranslated_keys { my ($from_parsed, $to_parsed) = @_; - grep { not exists $to_parsed->{$_} or $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed; + sort grep { exists $to_parsed->{$_} and $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed; } sub prune_untranslated_with_blacklist @@ -133,17 +178,13 @@ sub prune_untranslated_with_blacklist my %keys; @keys{@keys} = (); - my $end_yaml = Load(join '', ); + my $end_yaml = LoadFile(*DATA); my $untranslated_values = $end_yaml->{untranslated_values}; my $default = $untranslated_values->{default}; my $this_language = $untranslated_values->{$language} || {}; my %bw_list = (%$default, %$this_language); - use feature ':5.10'; - use Data::Dump 'dump'; - say STDERR dump \%bw_list; - while (my ($key, $blacklisted) = each %bw_list) { # FIXME: Does syck actually support true/false booleans in yaml? @@ -153,6 +194,38 @@ sub prune_untranslated_with_blacklist sort keys %keys; } +sub print_validate_variables +{ + my ($f, $t) = @_; + + while (my ($key, $val) = each %$f) + { + next if exists $f->{$key} and not exists $t->{$key}; + + my @from_var = parse_variables_from_string($f->{$key}); + my @to_var = parse_variables_from_string($t->{$key}); + + unless (@from_var ~~ @to_var) { + say "$key in $from has (@from_var) and $to has (@to_var)"; + } + + } +} + +sub parse_variables_from_string +{ + my ($string) = @_; + + # This probably matches most of the variables + my $var = qr/ [a-z0-9_]+? /xs; + + if (my @var = $string =~ m/ \{\{ ($var) \}\} | \[\[ ($var) \]\] /gsx) { + return sort grep { defined } @var; + } else { + return; + } +} + sub iterate { my ($hash, @path) = @_; @@ -173,13 +246,52 @@ sub iterate return @ret; } +# $s = 'foo.bar.baz.spam.eggs.ham'; $h = \%h; $h = $h->{$_} = {} for split /\./, $s; \%h +# ==> {foo => {bar => {baz => {spam => {eggs => {ham => {}}}}}}} +sub insert_string_deep { + my ($h, $ks, $v) = @_; + my $p = \$h; $p = \$$p->{$_} for split /\./, $ks; + $$p = $v; +} + +# sub insert_string_deep +# { +# my ($hash, $key, $value) = @_; +# +# my @key = split /\./, $key; +# my $h = $hash; +# +# my $i = 0; +# for my $k (@key) { +# $i ++; +# if ($i == @key) { +# $h->{$k} = $value; +# } else { +# if (ref $h->{$k}) { +# $h = $h->{$k}; +# } else { +# $h = $h->{$k} = {}; +# } +# } +# } +# } + sub basename { my $name = shift; $name =~ s[\..*?$][]; + $name =~ s[.*/][]; $name; } +sub mark_utf8 +{ + my ($hash) = @_; + + # Mark as UTF-8 + map { if (ref $_ eq 'ARRAY') { map { utf8::decode($_) } @$_ } else { utf8::decode($_) } } values %$hash; +} + sub help { my %arg = @_; @@ -203,6 +315,68 @@ untranslated_values: html.dir: true layouts.intro_3_bytemark: true layouts.intro_3_ucl: true + layouts.project_name.h1: true + layouts.project_name.title: true + site.index.license.project_url: true + browse.relation_member.entry: true + + # #{{id}} + changeset.changeset.id: true + de: + activerecord.attributes.message.sender: true + activerecord.attributes.trace.name: true + activerecord.models.changeset: true + activerecord.models.relation: true + browse.changeset.changeset: true + browse.changeset.changesetxml: true + browse.changeset.osmchangexml: true + browse.changeset.title: true + browse.common_details.version: true + browse.containing_relation.relation: true + browse.relation.relation: true + browse.relation.relation_title: true + browse.start_rjs.details: true + browse.start_rjs.object_list.details: true + browse.tag_details.tags: true + changeset.changesets.id: true + export.start.export_button: true + export.start.format: true + export.start.output: true + export.start.zoom: true + export.start_rjs.export: true layouts.export: true + layouts.shop: true + site.edit.anon_edits: true + site.index.license.license_name: true + site.index.permalink: true + site.key.table.entry.park: true + site.search.submit_text: true + trace.edit.tags: true + trace.trace.in: true + trace.trace_form.tags: true + trace.trace_optionals.tags: true + trace.view.tags: true + user.account.public editing.enabled link: true + + is: + # ({{link}}) + site.edit.anon_edits: true + + # Creative Commons Attribution-Share Alike 2.0 + site.index.license.license_name: true + + # http://creativecommons.org/licenses/by-sa/2.0/ + site.index.license.license_url: true + + # {{id}} + printable_name.with_id: true + + # {{name}} ({{id}}) + printable_name.with_name: true + + # {{type}} + geocoder.search_osm_namefinder.prefix: true + # {{suffix}}, {{parentname}} + geocoder.search_osm_namefinder.suffix_suburb: true