#!/usr/bin/env perl
+use feature ':5.10';
use strict;
use warnings;
-use YAML::Syck qw(LoadFile);
+use YAML::Syck qw(Dump LoadFile);
use Test::Differences;
use Pod::Usage ();
use Getopt::Long ();
+use File::Basename qw(fileparse);
=head1 NAME
=head1 SYNOPSIS
- diff en.yml is.yml
# --keys is the default
+ diff en.yml is.yml
diff --keys en.yml is.yml
+
# --untranslated-values compares prints keys whose values don't differ
diff --untranslated-values en.yml is.yml
+ # --untranslated-values-all compares prints keys whose values
+ # don't differ. Ignoring the blacklist which prunes things
+ # unlikley to be translated
+ diff --untranslated-values-all en.yml is.yml
+
+ # Check that interpolated variables ({{var}} and [[var]]) are the same
+ diff --validate-variables en.yml is.yml
+
=head1 DESCRIPTION
This utility prints the differences between two YAML files using
=item --untranslated-values
-Show keys whose values are either exactly the same between the two
-files, or don't exist in the target file (the latter file specified).
+Show keys that B<exist in both the compared files> and whose values
+are exactly the same. Use C<--keys> to a list of values that hasn't
+been merged.
+
+The values are pruned according to global and language specific
+blacklists found in the C<__DATA__> section of this script.
This helps to find untranslated values.
+=item --untranslated-values-all
+
+Like C<--untranslated-values> but ignores blacklists.
+
+=item --validate-variables
+
+Check that interpolated Ruby i18n variables (C<{{foo}}> and
+C<[[foo]]>) are equivalent in the two provided files.
+
+=item --dump-flat
+
+Dump a flat version of the translation hash in YAML format,
+i.e. "foo.bar" instead of "{foo}->{bar}".
+
=back
=head1 AUTHOR
-E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@f-prot.com>
+E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avarab@gmail.com>
=cut
)->getoptions(
'h|help' => \my $help,
'keys' => \my $keys,
+ 'dump-flat' => \my $dump_flat,
'untranslated-values' => \my $untranslated_values,
+ 'untranslated-values-all' => \my $untranslated_values_all,
+ 'validate-variables' => \my $validate_variables,
) or help();
+# --keys is the default
+$keys = 1 if not $untranslated_values_all and not $untranslated_values and not $validate_variables and not $dump_flat;
+
# On --help
help() if $help;
# If we're not given two .yml files
-help() if @ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1]);
+help() if (@ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1])) and not $dump_flat;
my ($from, $to) = @ARGV;
my $from_data = LoadFile($from);
-my $to_data = LoadFile($to);
+my $from_parsed = { iterate($from_data->{fileparse($from, qr/\.[^.]*/)}) };
+
+if ($dump_flat)
+{
+ # Mark as UTF-8
+ map { if (ref $_ eq 'ARRAY') { map { utf8::decode($_) } @$_ } else { utf8::decode($_) } } values %$from_parsed;
+
+ print Dump $from_parsed;
+
+ exit 0;
+}
-my $from_parsed = { iterate($from_data->{basename($from)}) };
-my $to_parsed = { iterate($to_data->{basename($to)}) };
+my $to_data = LoadFile($to);
+my $to_parsed = { iterate($to_data->{fileparse($to, qr/\.[^.]*/)}) };
-# Since this used to be the default, support that...
-if ((not $untranslated_values and not $keys) or $keys)
+if ($keys)
{
- print_key_differences();
+ print_key_differences($from_parsed, $to_parsed);
}
-elsif ($untranslated_values)
+elsif ($untranslated_values or $untranslated_values_all)
{
my @untranslated = untranslated_keys($from_parsed, $to_parsed);
- print $_, "\n" for @untranslated;
+ # Prune according to blacklist
+ if ($untranslated_values) {
+ @untranslated = prune_untranslated_with_blacklist(scalar(fileparse($to, qr/\.[^.]*/)), @untranslated);
+ }
+
+ say for @untranslated;
+} elsif ($validate_variables)
+{
+ print_validate_variables($from_parsed, $to_parsed);
}
exit 0;
sub print_key_differences
{
+ my ($f, $t) = @_;
+
# Hack around Test::Differences wanting a Test::* module loaded
$INC{"Test.pm"} = 1;
sub Test::ok { print shift }
# Diff the tree
- eq_or_diff([ sort keys %$from_parsed ], [ sort keys %$to_parsed ]);
+ eq_or_diff([ sort keys %$f ], [ sort keys %$t ]);
}
sub untranslated_keys
{
my ($from_parsed, $to_parsed) = @_;
- sort grep { not exists $to_parsed->{$_} or $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed;
+ sort grep { exists $to_parsed->{$_} and $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed;
+}
+
+sub prune_untranslated_with_blacklist
+{
+ my ($language, @keys) = @_;
+ my %keys;
+ @keys{@keys} = ();
+
+ my $end_yaml = LoadFile(*DATA);
+ my $untranslated_values = $end_yaml->{untranslated_values};
+ my $default = $untranslated_values->{default};
+ my $this_language = $untranslated_values->{$language} || {};
+
+ my %bw_list = (%$default, %$this_language);
+
+ while (my ($key, $blacklisted) = each %bw_list)
+ {
+ # FIXME: Does syck actually support true/false booleans in yaml?
+ delete $keys{$key} if $blacklisted eq 'true'
+ }
+
+ sort keys %keys;
+}
+
+sub print_validate_variables
+{
+ my ($f, $t) = @_;
+
+ while (my ($key, $val) = each %$f)
+ {
+ next if exists $f->{$key} and not exists $t->{$key};
+
+ my @from_var = parse_variables_from_string($f->{$key});
+ my @to_var = parse_variables_from_string($t->{$key});
+
+ unless (@from_var ~~ @to_var) {
+ say "$key in $from has (@from_var) and $to has (@to_var)";
+ }
+
+ }
+}
+
+sub parse_variables_from_string
+{
+ my ($string) = @_;
+
+ # This probably matches most of the variables
+ my $var = qr/ [a-z0-9_]+? /xs;
+
+ if (my @var = $string =~ m/ \{\{ ($var) \}\} | \[\[ ($var) \]\] /gsx) {
+ return sort grep { defined } @var;
+ } else {
+ return;
+ }
}
sub iterate
return @ret;
}
-sub basename
-{
- my $name = shift;
- $name =~ s[\..*?$][];
- $name;
-}
-
sub help
{
my %arg = @_;
-exitval => $arg{ exitval } || 0,
);
}
+
+__DATA__
+untranslated_values:
+
+ # Default/Per language blacklist/whitelist for the
+ # --untranslated-values switch. "true" as a value indicates that the
+ # key is to be blacklisted, and "false" that it's to be
+ # whitelisted. "false" is only required to whitelist a key
+ # blacklisted by default on a per-language basis.
+
+ default:
+ html.dir: true
+ layouts.intro_3_bytemark: true
+ layouts.intro_3_ucl: true
+ layouts.project_name.h1: true
+ layouts.project_name.title: true
+ site.index.license.project_url: true
+ browse.relation_member.entry: true
+
+ # #{{id}}
+ changeset.changeset.id: true
+
+ de:
+ activerecord.attributes.message.sender: true
+ activerecord.attributes.trace.name: true
+ activerecord.models.changeset: true
+ activerecord.models.relation: true
+ browse.changeset.changeset: true
+ browse.changeset.changesetxml: true
+ browse.changeset.osmchangexml: true
+ browse.changeset.title: true
+ browse.common_details.version: true
+ browse.containing_relation.relation: true
+ browse.relation.relation: true
+ browse.relation.relation_title: true
+ browse.start_rjs.details: true
+ browse.start_rjs.object_list.details: true
+ browse.tag_details.tags: true
+ changeset.changesets.id: true
+ export.start.export_button: true
+ export.start.format: true
+ export.start.output: true
+ export.start.zoom: true
+ export.start_rjs.export: true
+ layouts.export: true
+ layouts.shop: true
+ site.edit.anon_edits: true
+ site.index.license.license_name: true
+ site.index.permalink: true
+ site.key.table.entry.park: true
+ site.search.submit_text: true
+ trace.edit.tags: true
+ trace.trace.in: true
+ trace.trace_form.tags: true
+ trace.trace_optionals.tags: true
+ trace.view.tags: true
+ user.account.public editing.enabled link: true
+
+ is:
+ # ({{link}})
+ site.edit.anon_edits: true
+
+ # Creative Commons Attribution-Share Alike 2.0
+ site.index.license.license_name: true
+
+ # http://creativecommons.org/licenses/by-sa/2.0/
+ site.index.license.license_url: true
+
+ # {{id}}
+ printable_name.with_id: true
+
+ # {{name}} ({{id}})
+ printable_name.with_name: true
+
+ # {{type}}
+ geocoder.search_osm_namefinder.prefix: true
+
+ # {{suffix}}, {{parentname}}
+ geocoder.search_osm_namefinder.suffix_suburb: true