use warnings;
use strict;
use Data::Dumper; 

########################################
my $default_lines_per_page = 46;
my $default_characters_per_line = 72; # 
my $default_tab_stop = 3;
my $default_output_text = 0;
########################################

if( (scalar(@ARGV)==0) or ($ARGV[0]=~m{\A-h}) ) {
	print <<"HELP";

Call this script, passing in the name of a text file
containing your story in plaintext format. This script
will then read it, and generate a publisher-friendly
word count (note that this is a different sort of 
word count than is reported by normal word processors.)

Example:

	perl wordcount.pl mystory.txt

By default, the program will assume 
$default_lines_per_page lines per page and 
$default_characters_per_line characters per line and
$default_tab_stop characters per tab.

If you want to change any of these values, 
add them on the command line after the filename.
For example:

	perl wordcount.pl mystory.txt -lines=23 -chars=60 -tabs=5

If you want to change the defaults, edit the 
first few lines of your copy of the program.

Copyright 2006 Greg London
This program licensed under the 
CreativeCommons-Attribution license.
http://creativecommons.org/licenses/by/2.5/

HELP
;

exit;

}

###########################################################
# process arguments and check for errors
###########################################################
my $storyname = shift(@ARGV);


my %actual_values = (
	lines => $default_lines_per_page,
	chars => $default_characters_per_line,
	tabs  => $default_tab_stop,
	text  => $default_output_text,
);

while(scalar(@ARGV)) {
	my $arg = shift(@ARGV);

	if ($arg=~m{-(\w+)=(\d+)}) {
		my ($key, $val) = ($1,$2);

		unless(exists($actual_values{$key})) {
			die "Error: unknown argument '$arg'";
		}

		$actual_values{$key}=$val;
	}		
}

#print Dumper \%actual_values;

unless(defined($storyname)) {
	die "Error: please provide text filename";
}

unless (-e $storyname) {
	die "Error: could not find file '$storyname'";
}



###########################################################
# only print out if -text=1
###########################################################
sub pprint {
###########################################################
	if($actual_values{text}) {
		print @_;
	}
}

###########################################################
#process file
###########################################################

open(my $in, $storyname) 
	or die "Error: unable to open $storyname";

my $linecounter = 0;
my $columncounter = 0;

my $chunk='';

while(<$in>) {
	$linecounter++;
	my $linetext = $_;

	while(length($linetext)) {
		

		if(0) {

		# tabs
		} elsif ($linetext =~ s{\A(\t)}{}) {
			$chunk = ' ' x $actual_values{tabs};
			$columncounter += $actual_values{tabs};
			#warn "aaa";
		
		} elsif ($linetext =~ s{\A(\n)}{}) {
			$chunk = $1;
			$columncounter = $actual_values{chars} + 10;
			#warn "aaa";

		# whitespace can go on end of line past line.
		} elsif ($linetext =~ s{\A(\s)}{}) {
			$chunk = $1;
			$columncounter += 1;
			#warn "aaa";

		# don't split words with trailing punctuation.
		} elsif ($linetext =~ s{\A(\w+\S+)}{}) {
			$chunk = $1;
			$columncounter += length($chunk);
			#warn "aaa";

		# don't split words, but if followed by whitespace, can ignore space this round.
		} elsif ($linetext =~ s{\A(\w+)}{}) {
			$chunk = $1;
			$columncounter += length($chunk);
			#warn "aaa";

		# punctuation marks must be surrouned by whitespace
		} elsif ($linetext =~ s{\A([^\s\w]+)}{}) {
			$chunk = $1;
			$columncounter += length($chunk);
			#warn "aaa";
		} else {
			die "Parse Error: no match on remaining text, '$linetext'";
		}

		#print "chunk is '$chunk'\n";

		# now figure out if we're about to go past end of column
		if($columncounter> $actual_values{chars}) {

			# if white space at end of line, don't print it.
			if($chunk =~ m{\A\s})  {
				$columncounter = 0;
				pprint("\n");

			# if non-white space at end of line, print it on next line
			} else {
				pprint("\n");
				pprint($chunk);
				$columncounter = length($chunk);
			}


			$linecounter++;
		} else {

			# haven't reached end of line, print out plain text
			pprint($chunk)
		}

	} # while (linetext)
} # while (in)

close ($in) or warn "Warning: problem closing '$storyname', I hope it's OK.";

my $total_pages_float = $linecounter / $actual_values{lines};

my $total_pages_int = int($total_pages_float * 10);

my $total_pages = $total_pages_int / 10;

print "\n\n\n";
print "#"x40;
print "\ntotal lines for this text is $linecounter\n";
print "With a lines per page of ". ($actual_values{lines})."\n";
print "that yields a total of $total_pages pages\n";
print "#"x40;
print "\n\n\n";