#!/usr/bin/perl -w

# Copyright (C) 2009 Tommaso Cucinotta
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# TODO
#   x Use &Randomize() function in &Quiz() to avoid
#     multiple asking the same word
#   x Check german translations in %strings
#   o Extract/show/save/train problem words
#   x Show current vs. global statistics
#
my (%dir_dict, %frw_dict, %rev_dict);
my (%stat_scores, %stat_total);
my (%stat_part_scores, %stat_part_total);

my $language = "eng";
my $pw_mode = 0;	# Problem Words mode
my $pw_treshold = 0.7;	# Success rate below which a word becomes a PW
my $smart_update = 1;	# Smart (weighted) update mode
my $update_weight = 0.66; # How much last test is weighted vs. previous
my $delay = 2;		# Delay for ShowDictionary (-s option)
my $debug_mode = 0;	# Debug mode

my ($dict_fname, $show_mode, $speak_mode, $clear_stats);

my %strings = (
    Error => {
	ger => "Nein: die richtig Antwort ist",
	eng => "Error: the right answer is"},
    Ok => {
	ger => "Richt",
	eng => "Right"},
    ActualScore => {
	ger => "Sein Teilskore ist",
	eng => "Your partial score is"},
    PreviousScore => {
	ger => "Sein skore war",
	eng => "Your score was"},
    GlobalScore => {
	ger => "Sein gesallt Skore ist",
	eng => "Your global score is"},
    ActualScorePW => {
	ger => "Sein Teilskore mit Problemeworten ist",
	eng => "Your partial score on problem words is"},
    PreviousScorePW => {
	ger => "Sein Skore mit Problemeworten war",
	eng => "Your score on problem words was"},
    GlobalScorePW => {
	ger => "Sein gesallt Skore mit Problemeworten ist",
	eng => "Your global score on problem words is"},
    StoringStats => {
	ger => "Statistik ist in",
	eng => "Storing stats in"},
    Retry => {
	ger => "Sie haben ein zweite prove",
	eng => "You have a second try"},
    Usage => {
	ger => "Verwendung",
	eng => "Usage"},
    PWFound => {
	ger => "problematisch Wort gefunden",
	eng => "problem words found"},
    NoPWFound => {
	ger => "Kein problematisch Wort gefunden",
	eng => "No problem words found"},
);

my %speak_langs = (
    eng => "en",
    ger => "de",
    ita => "it",
);

# Print a debug message on STDOUT if debug_mode is on.
sub DebugLog {
    print @_ if ($debug_mode);
}

# Return a message according to the current language
sub Msg {
    my ($key) = @_;
    return "" . $strings{$key}{$language};
}

# Randomize an array
sub Randomize {
    my @array = @_;
    my @result;
    DebugLog "Randomizing...\n";
    while ($#array != -1) {
	my $n = int(rand() * ($#array + 1));
	DebugLog "  " . $#array . ", $n\n";
	push @result, $array[$n];
	if ($n != $#array) {
	    $array[$n] = pop @array;
	} else {
	    pop @array;
	}
    }
    return @result;
}

# Remove any leading and trailing whitespaces
sub Reformat {
    my ($ref) = @_;
    $$ref =~ s/^\s*(.*)/$1/;
    while ($$ref =~ /^(.*)\s+$/) {
	$$ref = $1;
    }
}

sub GetArticle {
    my ($name) = @_;
    if ($name =~ /(der|die|das).*/) {
	return $1;
    }
}

sub ArticleToGenre {
    my ($name) = @_;
    if ($name eq "der") {
	return "n";
    } elsif ($name eq "die") {
	return "f";
    } elsif ($name eq "das") {
	return "n";
    }
}

sub DumpDictionary {
    foreach my $key (keys %dir_dict) {
	DebugLog "$key = "
	    . join(", ", keys %{$dir_dict{$key}}) . "\n";
    }
}

sub Delay {
    my ($ticks) = @_;
    while ($ticks > 0) {
	$| = 1;
	print ". ";
	sleep(1);
	$ticks--;
    }
}

sub ShowDictionary {
    my $i = 0;
    my @keys = keys %dir_dict;
    if ($pw_mode) {
	@keys = &SelectPW(@keys);
	if (! @keys) {
	    print &Msg("NoPWFound");
	    return;
	}
    }
    @keys = &Randomize(@keys);
    foreach my $key (@keys) {
	$| = 1;
	print "\n\n";
	printf "%3d", int($i * 100 / $#keys);
	print "% $key = ";
	&Delay($delay);
	print "" . join(", ", keys %{$dir_dict{$key}}) . "\n";
	sleep(1);
	$i++;
    }
    print "\n\n";
}

sub ShowDictionarySpeak {
    my $i = 0;
    my @keys = keys %dir_dict;
    if ($pw_mode) {
	@keys = &SelectPW(@keys);
	if (! @keys) {
	    print &Msg("NoPWFound");
	    SayMessage("NoPWFound");
	    return;
	}
    }
    @keys = &Randomize(@keys);
    foreach my $key (@keys) {
	$| = 1;
	print "\n\n";
	printf "%3d", int($i * 100 / $#keys);
	print "% $key = ";
	SayWord($key);
	&Delay($delay);
	my $str = join(", ", keys %{$dir_dict{$key}});
	print "" . $str . "\n";
	SayWordTranslation($key);
	sleep(1);
	$i++;
    }
    print "\n\n";
}

sub LoadDictionary {
    my ($fname) = @_;
    open(FD, "< $fname");
    while ($line = <FD>) {
	# Catch command directives: '# --option = value'
        #   --lang = <lang>
	if ($line =~ /\#[[:space:]]*--(.*)=(.*)/) {
	    my $opt = $1;
	    my $val = $2;
	    &Reformat(\$opt);
	    &Reformat(\$val);
	    $language = $val if ($opt eq "lang");
	} elsif ($line =~ /(.*)=(.*)/) {
	    my @ita_words = split(/, /, $1);
	    my @eng_words = split(/, /, $2);
	    foreach my $eng_word (@eng_words) {
		foreach my $ita_word (@ita_words) {
		    &Reformat(\$eng_word);
		    &Reformat(\$ita_word);
		    if ($eng_word && $ita_word) {
			$dir_dict{$eng_word}{$ita_word} = 1;
			$dir_dict{$ita_word}{$eng_word} = 1;
			$frw_dict{$ita_word}{$eng_word} = 1;
			$rev_dict{$eng_word}{$ita_word} = 1;
		    }
		}
	    }
	}
    }
    close(FD);
}

sub CreateStatistics {
    foreach my $key (keys %dir_dict) {
	$stat_scores{$key} = 0;
	$stat_total{$key} = 0;
	$stat_part_scores{$key} = 0;
	$stat_part_total{$key} = 0;
    }
#    foreach my $key (keys %rev_dict) {
#	$stat_scores{$key} = 0;
#	$stat_total{$key} = 0;
#    }
}

sub LoadStatistics {
    my ($fname) = @_;
    if (-f $fname) {
	open(FD, "< $fname");
	while ($line = <FD>) {
	    if ($line =~ /(.*)=(.*)\/(.*)/) {
		$stat_scores{$1} = $2;
		$stat_total{$1} = $3;
	    }
	}
	close(FD);
    }
}

sub WriteStatistics {
    my ($fname) = @_;
    open(FD, "> $fname");
    foreach my $key (keys %stat_scores) {
	print FD "$key=" . $stat_scores{$key} . "/" . $stat_total{$key} . "\n";
    }
    close(FD);
}

# my $score = &CalcStatistics(\%stat_scores, \%stat_total, keys %stat_scores);
sub CalcStatistics {
    my ($stat_scores, $stat_total, @keys) = @_;
    my $score = 0;
    my $total = 0;
    DebugLog "CalcStatistics:\n";
    foreach my $key (@keys) {
	DebugLog "  $key: " . $stat_scores->{$key} . "/"
	    . $stat_total->{$key} . "\n";
	$score += $stat_scores->{$key} if $stat_scores->{$key};
	$total += $stat_total->{$key} if $stat_total->{$key};
    }
    if ($total == 0) {
	$score = 0;
    } else {
	$score = int($score * 100 / $total);
    }
    DebugLog "  Global Score = $score\n";
    return $score;
}

sub UpdateStatistics {
    foreach my $key (keys %stat_part_scores) {
	my $ti = $stat_total{$key};		# t[i]
	my $ri = $stat_scores{$key};		# r[i]
	my $pti1 = $stat_part_total{$key};	# pt[i+1]
	my $pri1 = $stat_part_scores{$key};	# pr[i+1]
	my ($ti1, $ri1);			# t[i+1], r[i+1]
	my $uw = $update_weight;
	$ti1 = $ti + $pti1;
	if (! $smart_update) {
# Normal update:
	    $ri1 = $ri + $pri1;
	} else {
# Smart update: last test has a weight of $alpha, where all
# of the previous ones have a weight of (1-$alpha)
	    if ($pti1 == 0) {
		$ri1 = $ri;
	    } else {
		if ($ti == 0) {
# No previous tests, so the last gets full score (no weight)
		    $ri1 = $ti1 * $pri1/$pti1;
		} else {
		    $ri1 = $ti1 * ((1-$uw)*$ri/$ti + $uw*$pri1/$pti1);
		}
	    }
	}
	$stat_total{$key} = $ti1;
	$stat_scores{$key} = $ri1;
    }
}

sub DumpStatistics {
    my @keys = keys %dir_dict;
    my $pw_str = "";
    if ($pw_mode) {
	@keys = &SelectPW(@keys);
	$pw_str = "PW";
    }
    my $score = &CalcStatistics(\%stat_scores, \%stat_total, @keys);
    print "\n" . &Msg("PreviousScore" . $pw_str) . " $score%\n";
    $score = &CalcStatistics(\%stat_part_scores, \%stat_part_total, @keys);
    print "\n" . &Msg("ActualScore" . $pw_str) . " $score%\n";
    &UpdateStatistics();
    # Now the statistics after update are calculated on THE SAME KEYS
    # as before updating
    $score = &CalcStatistics(\%stat_scores, \%stat_total, @keys);
    print "\n" . &Msg("GlobalScore" . $pw_str) . " $score%\n";

    if ($pw_mode) {
	$score = &CalcStatistics(\%stat_scores, \%stat_total, keys %stat_scores);
	print "\n" . &Msg("GlobalScore") . " $score%\n";
    }
}

sub GetLine {
    my ($question) = @_;
    print "$question\n";
    my $answer = "" . <STDIN>;
    chop $answer;
    &Reformat(\$answer);
    return $answer;
}

sub Match {
    my ($a, $b) = @_;
    if (exists $dir_dict{$a}{$b}) {
	return 1;
    } else {
	return 0;
    }
}

sub SelectPW {
    my @keys = @_;
    # Eliminate good guessed keys, that is keys with success rate >= pw_treshold
    my $i = 0;
    while ($i <= $#keys) {
	my $key = $keys[$i];
	if (($stat_total{$key} > 0)
	    && ($stat_scores{$key} / $stat_total{$key} >= $pw_treshold)) {
	    splice(@keys, $i, 1);
	} else {
	    $i++;
	}
    }
    return @keys;
}

sub SayMessage {
    my ($msg) = @_;
    if ($speak_mode && exists $speak_langs{$language}) {
	my $cmd = "espeak -v " . $speak_langs{$language} . ' "' . &Msg($msg) . '"';
	my $out = `$cmd`;
    }
}

sub SayWord {
    my ($word) = @_;
    if ($speak_mode && exists $speak_langs{$language}) {
	if (exists $rev_dict{$word}) {
	    $speak_lang = $speak_langs{$language};
	} else {
	    $speak_lang = "it";
	}
	my $cmd = "espeak -v " . $speak_lang . ' "' . $word . '"';
	my $out = `$cmd`;
    }
}

sub SayWordTranslation {
    my ($word) = @_;
    my ($speak_lang, $str);
    if (exists $frw_dict{$word}) {
	$str = join(", ", keys %{$frw_dict{$word}});
	$speak_lang = $speak_langs{$language};
    } else {
	$str = join(", ", keys %{$rev_dict{$word}});
	$speak_lang = "it";
    }
    my $cmd = "espeak -v " . $speak_lang . ' "' . $str . '"';
    my $out = `$cmd`;
}

sub Quiz {
    my @keys = keys %dir_dict;
    if ($pw_mode) {
	# Eliminate good guessed keys
	@keys = &SelectPW(@keys);
	if (@keys) {
	    print "" . ($#keys + 1) . " " . &Msg("PWFound") . "\n";
	} else {
	    print &Msg("NoPWFound") . "\n";
	    return;
	}
    }
    @keys = &Randomize(@keys);
    my $num_keys = $#keys + 1;
    my $num = 1;
    foreach my $rnd_key (@keys) {
	my $question = sprintf("[%02d%%] -> $rnd_key ?", $num*100/$num_keys);
	SayWord($rnd_key);
	my $answer = &GetLine($question);
	if ("$answer" =~ /^(q|quit|exit|x)$/i) {
	    return;
	} elsif (&Match($rnd_key, $answer)) {
	    $stat_part_scores{$rnd_key}++;
	    print "\t\t" . &Msg("Ok") . "\n";
	    SayMessage("Ok");
	} else {
	    SayMessage("Retry");
	    SayWord($rnd_key);
	    $answer = &GetLine(&Msg("Retry") . " -> $rnd_key ?");
	    if (&Match($rnd_key, $answer)) {
		$stat_part_scores{$rnd_key}++;
		print "\t\t" . &Msg("Ok") . "\n";
		SayMessage("Ok");
	    } else {
		SayMessage("Error");
		SayWordTranslation($rnd_key);
		print "\t\t" . &Msg("Error") . " "
		    . join(", ", keys %{$dir_dict{$rnd_key}}) . "\n";
	    }
	}
	$stat_part_total{$rnd_key}++;
        $num++;
    }
}

sub Usage {
    print &Msg(Usage) . ": WordTeacher.pl [options] \<dict_fname\>\n";
    print "  Options:\n";
    print "      --help|-h             Show this help screen and exit\n";
    print "      --show|-s             Show only dictionary, no quiz\n";
    print "      --speak|-p            Enable use of espeak engine\n";
    print "      --delay|-d <delay>    Set show delay\n";
    print "      --clear-stats|-cs     Clear statistics\n";
    print "      -l <lang>             Set program language\n";
    print "      --problem-words|-pw   Train only problem words\n";
    print "      -pwt <treshold>       Set problem word treshold\n";
    print "      -uw <weight>          Set update weight (default = 0.66)\n";
    print "      --normal-update|-nu   Disable weighted stat update\n";
    print "      --debug-mode|-dm      Enable debug mode\n";
}

my $i = 0;

while ($i <= $#ARGV) {
    if (($ARGV[$i] eq "--help") || ($ARGV[$i] eq "-h")) {
	Usage();
	exit 0;
    } elsif (($ARGV[$i] eq "--show") || ($ARGV[$i] eq "-s")) {
	$show_mode = 1;
    } elsif (($ARGV[$i] eq "--speak") || ($ARGV[$i] eq "-p")) {
	$speak_mode = 1;
    } elsif (($ARGV[$i] eq "--debug-mode") || ($ARGV[$i] eq "-dm")) {
	$debug_mode = 1;
    } elsif (($ARGV[$i] eq "--problem-words") || ($ARGV[$i] eq "-pw")) {
	$pw_mode = 1;
    } elsif (($ARGV[$i] eq "--clear-stats") || ($ARGV[$i] eq "-cs")) {
	$clear_stats = 1;
    } elsif (($ARGV[$i] eq "--lang") || ($ARGV[$i] eq "-l")) {
	$i++;
	$language = $ARGV[$i];
    } elsif (($ARGV[$i] eq "--delay") || ($ARGV[$i] eq "-d")) {
	$i++;
	$delay = $ARGV[$i];
    } elsif ($ARGV[$i] eq "-pwt") {
	$i++;
	$pw_treshold = $ARGV[$i];
    } elsif (($ARGV[$i] eq "--update-weight") || ($ARGV[$i] eq "-uw")) {
	$i++;
	$update_weight = $ARGV[$i];
    } elsif (($ARGV[$i] eq "--normal-update") || ($ARGV[$i] eq "-nu")) {
	$smart_update = 0;
    } else {
	$dict_fname = $ARGV[$i];
    }
    $i++;
}

&LoadDictionary($dict_fname);

my $stat_fname = $dict_fname;
$stat_fname =~ s/\.dict/\.stat/;

print "" . &Msg("StoringStats") . " $stat_fname\n";

&DumpDictionary();
&CreateStatistics();
&LoadStatistics($stat_fname) if (! $clear_stats);

if ($show_mode) {
    &ShowDictionarySpeak();
    exit(0);
}

&Quiz();

&DumpStatistics();

&WriteStatistics($stat_fname);