#!/usr/bin/perl

use warnings;
use strict;

use DBI;

my $a_z = join ", ", "a".."z";
my $xxx = join ", ", ("?") x 26;

#my $dbh = DBI->connect("dbi:SQLite:dbname=9live.db");
my $dbh = DBI->connect("dbi:mysql:9live;host=localhost", "root", "", { PrintError => 0 });
unless(@ARGV) {
  #$dbh->do("CREATE TABLE words (word, $a_z)");
  $dbh->do(qq(
    CREATE TABLE words (
      word VARCHAR(255) UNIQUE,
      @{[
        join ", ", map { "$_ TINYINT(1)" } "a".."z"
      ]}
    )
  ));

  my $sth = $dbh->prepare(qq(
    INSERT INTO words
    VALUES      (?, $xxx)
  ));

  my $start = time;
  my $i     = 0;
  my $dups  = 0;
  while(<>) {
    chomp $_;
    $_ = uc $_;
    next unless /^[A-Z]+$/;

    $sth->execute($_ => hist($_)) or $dups++;

    $i++;
    printf STDERR "%d words/s, %d dups (%.1f %%)\n", $i / (time - $start), $dups, $dups / $i * 100 if
      $i % 1000 == 0 and time - $start > 0;
  }
  printf STDERR "%d words/s, %d dups (%.1f %%)\n", $i / (time - $start), $dups, $dups / $i * 100;

} else {
  my @occ;
  local $" = "";
  unless(my $minus = $ARGV[1]) {
    @occ = hist(uc $ARGV[0]);
    print "Suche nach $ARGV[0] [@occ]...\n";
  } else {
    my @plus  = hist(uc $ARGV[0]);
    my @minus = hist(uc $ARGV[1]);
    @occ = @plus;
    $occ[$_] -= $minus[$_] for 0..$#minus;
    $_ < 0 and die "< 0!" for @occ;
    print "Suche nach $ARGV[0]-$ARGV[1] [@occ]...\n";
  }

  print "Exakt:\n";
  query(qq(
    SELECT word
    FROM   words
    WHERE  @{[
      join " AND ", map { lc chr(65 + $_) . " = '$occ[$_]'" } 0..25
    ]}
  ));
  print "\n";

  print "Enthaltend:\n";
  query(qq(
    SELECT word
    FROM   words
    WHERE  @{[
      join " AND ", map { lc chr(65 + $_) . " <= '$occ[$_]'" } 0..25
    ]}
  ));
  print "\n";

  print "Anti-Enthaltend:\n";
  query(qq(
    SELECT word
    FROM   words
    WHERE  @{[
      join " AND ", map { lc chr(65 + $_) . " >= '$occ[$_]'" } 0..25
    ]}
  ));
}

sub hist {
  my @occ = (0) x 26;
  $occ[ord($_) - 65]++ for split //, $_[0];
  
  return @occ;
}

sub query {
  print "$_\n" for
    sort { length $b <=> length $a || $a cmp $b }
    map { $_->[0] }
    @{ $dbh->selectall_arrayref($_[0]) }
}
