#! /usr/bin/perl -w

# Copyright (C) 2008 Paul Kuliniewicz <paul@kuliniewicz.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA.

use strict;

my $FONT_MIN = 10;
my $FONT_MAX = 60;

# Split the input into words and tally them up, ignoring case
# and punctuation.

my %counts = ();
while (<>)
{
	foreach (split /(\s+|--)/)
	{
		if (/(\w.*\w)/)
		{
			++$counts{lc $1};
		}
	}
}

# Scan the results for largest value.

my $max = 0;
foreach (values %counts)
{
	$max = $_ if $max < $_;
}

# Output HTML to make a word cloud.

print <<EOF;
<html>
	<head>
		<title>Word Cloud</title>
	</head>
	<body>

EOF

my $scale = ($FONT_MAX - $FONT_MIN) / log ($max);
foreach (sort keys %counts)
{
	my $size = $FONT_MIN + int (log ($counts{$_}) * $scale);
	my $uses = "$counts{$_} uses";
	$uses = "1 use" if $counts{$_} == 1;
	print "<span style='font-size: $size' title='$uses'>$_</span>\n";
}

print <<EOF;

	</body>
</html>
EOF

exit 0;
