#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

use lib '.';

#
# grutatxt - Text to HTML (and other formats) converter
# http://www.triptico.com/software/grutatxt.html
#
# Copyright (C) 2000/2008 Angel Ortega <angel@triptico.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
# http://www.triptico.com
#

use Getopt::Long;
use locale;
use Grutatxt;

$VERSION = $Grutatxt::VERSION . ':1';

# input file
$input_file = '-';

# output file
$output_file = '>-';

# CSS information
$css = '';
$embed_css = 0;

# page title
$title = '';

# offset for the h? headers
$header_offset = 0;

# default mode
$mode = 'HTML';

# use real dl
$dl_as_dl = 0;

# troff table type
$table_type = 'allbox';

# abstract line number
$abstract = 0;

# man page section
$man_section = 1;

# default tab size in LaTeX mode
$tabsize = 8;

# avoid time signature
$no_time_sig = 0;

# disable pure verbatim mode
$no_pure_verbatim = 0;

#####################################################################

# parse options
if (!GetOptions('i|input=s'		=>	\$input_file,
		'o|output=s'		=>	\$output_file,
		'c|css=s'		=>	\$css,
		'e|embed-css'		=>	\$embed_css,
		't|title=s'		=>	\$title,
		'f|header-offset=s'	=>	\$header_offset,
		'b|table-headers'	=>	\$table_headers,
		'ct|center-tables'	=>	\$center_tables,
		'xt|expand-tables'	=>	\$expand_tables,
		'sp|strip-parens'	=>	\$strip_parens,
		'ts|tabsize=s'          =>      \$tabsize,
		'nb|no-body'		=>	\$no_body,
		'v|version'		=>	\$version,
		'h|help'		=>	\$usage,
		'm|mode=s'		=>	\$mode,
		's|man-section=s'	=>	\$man_section,
		'docclass=s'		=>	\$latex_docclass,
		'papersize=s'		=>	\$papersize,
		'encoding=s'		=>	\$encoding,
		'dl'			=>	\$dl_as_dl,
		'no-time-sig'		=>	\$no_time_sig,
		'no-pure-verbatim'	=>	\$no_pure_verbatim
	) or $usage) {
	usage();
}

if ($version) {
	print "$VERSION\n"; exit(0);
}

open I, $input_file or die "Can't open $input_file: $!";
open O, ">$output_file" or die "Can't create $output_file: $!";

# if utf8 encoding is wanted, set the filehandles as utf8
# so that regular expressions match all characters
# (this is crap)
if (defined($encoding) && $encoding =~ /^utf-?8/i) {
	binmode(I, ":utf8");
	binmode(O, ":utf8");
}

$content = join('',<I>);
close I;

$content_title = '';

# make tab to space conversion only in LaTeX mode
$tabsize = 0 unless $mode =~ /^latex$/i;

$grutatxt = new Grutatxt(
		'mode'		=> $mode,
		'header-offset' => $header_offset,
		'table-headers' => $table_headers,
		'center-tables' => $center_tables,
		'expand-tables' => $expand_tables,
		'strip-parens'	=> $strip_parens,
		'strip-dollars' => $strip_dollars,
		'tabsize'       => $tabsize,
		'dl-as-dl'	=> $dl_as_dl,
		'table-type'	=> $table_type,
		'title' 	=> \$content_title,
		'abstract'	=> \$abstract,
		'page-name'	=> $title,
		'section'	=> $man_section,
		'docclass'	=> $latex_docclass,
		'papersize'	=> $papersize,
		'encoding'	=> $encoding,
		'no-pure-verbatim' => $no_pure_verbatim
		);

@result = $grutatxt->process($content);

$title = $content_title unless $title;
$no_body = 1 unless $mode =~ /^html$/i;

unless ($no_body) {
	print O "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n";
	print O " 	\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n";
	print O "<html><head>\n";
	print O "<meta http-equiv='Content-Type' content='text/html; charset=" .
		($encoding || 'iso-8859-1') . "'>\n";
	print O "<title>$title</title>\n";

	printf O "<!-- converted from text by grutatxt $VERSION on %s -->\n", scalar(localtime)
		unless $no_time_sig;

	if ($css) {
		if ($embed_css) {
			if (open C, $css) {
				my ($c) = join('', <C>);
				close C;

				print O "<style type='text/css'>\n";
				print O $c . "\n";
				print O "</style>\n";
			}
			else {
				die "Can't open '$css' CSS file.";
			}
		}
		else {
			print O "<link rel=StyleSheet href='$css' type='text/css'>";
		}
	}

	print O "</head><body>\n";
}

foreach my $l (@result) {
	print O "$l\n";
}

print O "</body></html>\n" unless $no_body;

close O;

exit(0);


sub usage
{
	print "grutatxt $VERSION - Grutatxt format processor\n";
	print "Copyright (C) 2000/2008 Angel Ortega <angel\@triptico.com>\n";
	print "This software is covered by the GPL license. NO WARRANTY.\n\n";

	print "Usage:\n";
	print "\n";
	print "grutatxt [options] < input_text_file > output_html_file\n";
	print "\n";
	print "Global options:\n\n";
	print "    -i|--input=FILE            Input file (STDIN)\n";
	print "    -o|--output=FILE           Output file (STDOUT)\n";
	print "    -t|--title=TITLE           Document title (if unset,\n";
	print "                               level 1 heading is used)\n";
	print "    -sp|--strip-parens         Strip parentheses in function\n";
	print "                               names (shown monospaced anyway)\n";
	print "    -sd|--strip-dollars        Strip leading \$ in variable\n";
	print "                               names (shown monospaced anyway)\n";
	print "    -m|--mode=[HTML|troff|man|latex]\n";
	print "                               Output mode: HTML, troff, man or latex\n";
	print "                               (default: HTML)\n";
	print "    --no-time-sig              Avoid time signature in HTML comment\n";
	print "    --no-pure-verbatim         Disable pure verbatim mode\n\n";
	print "HTML options:\n\n";
	print "    -c|--css=CSS_URL_OR_FILE   CSS URL (or file if using --embed-css)\n";
	print "    -e|--embed-css             Embed CSS instead of linking to it\n";
	print "    -f|--header-offset=NUMBER  Offset to add to <h1>,\n";
	print "                               <h2>... headers (default 0)\n";
	print "    -b|--table-headers         Use <th> instead of <td> in\n";
	print "                               the first row of each table\n";
	print "    -ct|--center-tables        Centers the tables\n";
	print "    -xt|--expand-tables        Expands the tables (width=100\%)\n";
	print "    -nb|-no-body               Don't generate <html><body>...\n";
	print "                               </body></html> enclosing.\n";
	print "    --encoding=ENCODING        Character encoding (default: iso-8859-1)\n";
	print "    -dl                        Use real <dl>, <dd> and <dt>\n";
	print "                               instead of tables in definition lists.\n";
	print "troff options:\n\n";
	print "    --table-type=TYPE          Table type. Possible values:\n";
	print "                               box, allbox, doublebox (default allbox)\n";
	print "man options:\n\n";
	print "    -s|--man-section=SECTION   Man page section (default: 1)\n\n";
	print "LaTeX options:\n\n";
	print "    --docclass=CLASS           Document class (default: report)\n";
	print "    --papersize=SIZE           Paper size (default: a4paper)\n";
	print "    --encoding=ENCODING        Character encoding (default: latin1)\n";
	print "    -ts|--tabsize=NUMBER       Tab size for tab to space conversions in\n";
	print "                               LaTeX verbatim environment (default: 8)\n";

	exit(0);
}
