#!/usr/bin/perl -w
# vim: set sw=4 ts=4 si et:
# Copyright: GPL
# Author: Guido Socher, guido@bearix.oche.de
#
use strict;
use vars qw($opt_T $opt_C $opt_t $opt_k $opt_l $opt_h);
use Getopt::Std;
require 5.004;
#
my $ver ="1.5";
#
my %validcat=("Forum"=>1,"Applications"=>1,"Hardware"=>1,'Webdesign'=>1,
    'System Administration'=>1,'Software Development'=>1,'Graphics'=>1,
    'Community'=>1,'UNIX Basics'=>1,'Kernel Corner'=>1,'Interviews'=>1,
    'Games'=>1
    );
# You may change this line if you want another default language:
my $lang="en";
#
my $lfcomment="http://linuxfocus.linuxbox.com/cgi-bin/lfcomment";
my $lftalkback="http://www.linuxfocus.org/cgi-bin/lftalkback";
#
#Note: the following is automatically overloaded. Only the chset is mandatory
#      if one key does not exit in language then it is taken from English (en)
my %intdat=(
 'de'=>{'chset'=>"iso-8859-1",'doct'=>'DE','abstract'=>'Zusammenfassung','content'=>'Inhalt','wwwresp'=>'Dem LinuxFocus-Team schreiben','aboutauthor'=>'&Uuml;ber den Autor','auth'=>'von','transinfo'=>'Autoren und &Uuml;bersetzer','lftalkback'=>'Talkback f&uuml;r diesen Artikel','talkbacktext'=>'Jeder Artikel hat seine eigene Seite f&uuml;r Kommentare und R&uuml;ckmeldungen. Auf dieser Seite kann jeder eigene Kommentare abgeben und die Kommentare anderer Leser sehen:','goto_talkback'=>'Talkback Seite'},
 'en'=>{'chset'=>"iso-8859-1",'doct'=>'EN','abstract'=>'Abstract','content'=>'Content','wwwresp'=>'Webpages maintained by the LinuxFocus Editor team','aboutauthor'=>'About the author','auth'=>'by','transinfo'=>'Translation information','home'=>'Home','map'=>'Map'=>,'index'=>'Index','search'=>'Search','news'=>'News','archives'=>'Archives','links'=>'Links','aboutus'=>'About LF','topmap'=>'Topbar-en.gif','botmap'=>'Bottombar-en.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lfcomment'=>'Click here to report a fault or send a comment to Linuxfocus', 'lftalkback'=>'Talkback form for this article','talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:','goto_talkback'=>'talkback page'},
 'es'=>{'chset'=>"iso-8859-1",'doct'=>'ES','abstract'=>'Resumen','content'=>'Contenidos','wwwresp'=>'Contactar con el equipo de LinuFocus','aboutauthor'=>'Sobre el autor','auth'=>'por','home'=>'Hogar','map'=>'Mapa'=>,'index'=>'Indice','search'=>'Busqueda','news'=>'Noticias','archives'=>'Arca','links'=>'Enlaces','aboutus'=>'Sobre LF','topmap'=>'Topbar-es.gif','botmap'=>'Bottombar-es.gif'},
 'fr'=>{'chset'=>"iso-8859-1",'doct'=>'FR','abstract'=>'R&eacute;sum&eacute;','content'=>'Sommaire','wwwresp'=>'Site Web maintenu par l&acute;&eacute;quipe d&acute;&eacute;dition LinuxFocus','aboutauthor'=>'L&acute;auteur','auth'=>'par','home'=>'Sommaire','map'=>'Carte'=>,'index'=>'Index','search'=>'Recherche','news'=>'Nouvelles','archives'=>'Archives','links'=>'Liens','aboutus'=>'A propos','topmap'=>'Topbar-fr.gif','botmap'=>'Bottombar-fr.gif','alttop'=>'[Barre Superieure]','altbot'=>'[Barre Inferieure]'},
 'nl'=>{'chset'=>"iso-8859-1",'doct'=>'NL','abstract'=>'Kort','content'=>'Inhoud','wwwresp'=>'Site onderhouden door het LinuxFocus editors team','aboutauthor'=>'Over de auteur','auth'=>'door','home'=>'Home','map'=>'Map'=>,'index'=>'Index','search'=>'Zoek','news'=>'Nieuws','archives'=>'Archieven','links'=>'Links','aboutus'=>'Over ons','topmap'=>'Topbar-nl.gif','botmap'=>'Bottombar-nl.gif','alttop'=>'[Hoofd-balk]','altbot'=>'[Voet-balk]', 'lfcomment'=>'Klik hier om een fout te melden of commentaar te geven', 'lftalkback'=>'Talkback voor dit artikel','talkbacktext'=>'Elk artikel heeft zijn eigen talkback pagina. Daar kan je commentaar geven of commentaar van anderen lezen:','goto_talkback'=>'ga naar de talkback pagina'},
 'gb'=>{'chset'=>"gb2312",'doct'=>'GB','abstract'=>'ÕªÒª','content'=>'ÕýÎÄ','wwwresp'=>'Ö÷Ò³ÓÉLinuxFocus±à¼­×éÎ¬»¤','aboutauthor'=>'¹ØÓÚ×÷Õß','auth'=>'by','transinfo'=>'·­ÒëÐÅÏ¢','home'=>'Home','map'=>'Map'=>,'index'=>'Index','search'=>'Search','news'=>'News','archives'=>'Archives','links'=>'Links','aboutus'=>'About LF','topmap'=>'Topbar-en.gif','botmap'=>'Bottombar-en.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]'},
 'jp'=>{'chset'=>"ISO-2022-JP"},
 'ko'=>{'chset'=>"EUC-KR"},
 'ru'=>{'chset'=>"koi8-r"},
 'tr'=>{'chset'=>"iso-8859-9"},
 'cn'=>{'chset'=>"Big-5"},
 );
#
# enforce html Umlaute for latin1
my %islatin=('de'=>1,'fr'=>1,'es'=>1,'it'=>1);
# global data:
my $today;
my $parsestate=0;
my @parsedtypes;
my @parseddata;
#
my $articlename;
my $articlenumber;
my $articlecategory;
my $articletitle;
my $articleauthorimg;
my $articleauthor;
my $articleauthorname;
my @articletransinfo;
my @articleaboutauthor;
my @articleabstract;
my @articleindex;
my $articleimage;
my $articlebody;
#
#
my $text;
#
&getopts("TCkl:ht")||die "ERROR: No such option. -h for help.n";
&help if ($opt_h);
$today=&today;
if ($opt_k){
    print "valid categories are:\n";
    foreach (sort keys %validcat){
        print " $_\n";
    }
    exit(0);
}
if ($opt_l){
    die "ERROR: invalid language specifier\n" unless($intdat{$opt_l}{'chset'});
    $lang=$opt_l;
    # copy keys from the english section that are not defined in this one:
    foreach (keys %{$intdat{'en'}}){
        $intdat{$opt_l}{$_} = $intdat{'en'}{$_} unless ($intdat{$opt_l}{$_});
    }
}
&help unless ($ARGV[0]);
$articlename=$ARGV[0];
$articlename=~s/meta\.//;
# basename:
$articlename=~s=^.*/==;
if ($articlename=~/(\d+)/){
    $articlenumber=$1;
}else{
    $articlenumber=0;
}
open (FF,"$ARGV[0]")||die "ERROR: can not read file $ARGV[0]\n";
$text=join "", <FF>;
$text=~s/_LF_/Linux<font color=\"red\">Focus<\/font>/g;
close FF;
if ($opt_l && $islatin{$opt_l}){
    &htmlumlaute(\$text);
}
&parse(\$text);
&evalarticle();
&printlf_format();

#-----
# Take the global data and print an article in LF format
sub printlf_format{
    my $tmp;
    my $i=0;
    my $base="";
    if ($opt_t){
        $base="<BASE href=\"http://www.nl.linuxfocus.org/English/articles/\">";
    }
    print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//$intdat{$lang}{doct}\">
<HTML>
<HEAD>
 <META http-equiv=\"Content-Type\" content=\"text/html; charset=$intdat{$lang}{chset}\">
 <META NAME=\"GENERATOR\" CONTENT=\"lfparser_$ver\">
 <!-- lfparser can be obtained from http://www.linuxfocus.org/developer/Guido/lfparser.html -->
 <META NAME=\"LFCATEGORY\" CONTENT=\"$articlecategory\">
 <TITLE>$articlecategory : $articletitle</TITLE>
 $base
</HEAD>
<BODY bgcolor=\"#ffffff\"  text=\"#000000\" alink=\"#336633\" link=\"#336633\" vlink=\"#336633\">
<MAP name=\"top\">
  <AREA shape=\"rect\" coords=\"367,9,418,30\" alt=\"$intdat{$lang}{home}\" href=\"../index.html\">
  <AREA shape=\"rect\" coords=\"423,9,457,30\" alt=\"$intdat{$lang}{'map'}\" href=\"../map.html\">
  <AREA shape=\"rect\" coords=\"463,9,508,30\" alt=\"$intdat{$lang}{'index'}\" href=\"../indice.html\">
  <AREA shape=\"rect\" coords=\"514,9,558,30\" alt=\"$intdat{$lang}{search}\" href=\"../Search/\">
</MAP>
<MAP name=\"bottom\">
  <AREA shape=\"rect\" coords=\"78,0,163,15\" alt=\"$intdat{$lang}{news}\" href=\"../News/\">
  <AREA shape=\"rect\" coords=\"189,0,284,15\" alt=\"$intdat{$lang}{archives}\" href=\"../Archives/\">
  <AREA shape=\"rect\" coords=\"319,0,395,15\" alt=\"$intdat{$lang}{links}\" href=\"../Links/\">
  <AREA shape=\"rect\" coords=\"436,0,523,15\" alt=\"$intdat{$lang}{aboutus}\" href=\"../aboutus.html\">
</MAP>
<!-- IMAGE HEADER -->
<CENTER>
  <IMG src=\"../../common/images/$intdat{$lang}{topmap}\" width=\"600\" height=\"40\" border=\"0\" alt=\"$intdat{$lang}{alttop}\" ismap usemap=\"#top\" ><BR>
  <IMG src=\"../../common/images/$intdat{$lang}{botmap}\" width=\"600\" height=\"21\" border=\"0\" alt=\"$intdat{$lang}{altbot}\" ismap usemap=\"#bottom\">
</CENTER>
<!-- SSI_INFO -->

<!--#include virtual=\"../../dynahead.shtml\" -->

<!-- SHORT BIO ABOUT THE AUTHOR -->
<TABLE ALIGN=LEFT BORDER=0 CELLSPACING=0 CELLPADDING=5 WIDTH=\"30%\" >
<TR>
<TD><$articleauthorimg>
<BR>$intdat{$lang}{auth}  $articleauthor
<BR><BR>\n";
if (@articleaboutauthor){
    print "<I>$intdat{$lang}{aboutauthor}:</I><BR>\n";
    print join "", @articleaboutauthor;
    print "\n";
}
if (@articleindex){
    print "<P><FONT COLOR=\"#336633\">$intdat{$lang}{content}</FONT>:\n<UL>\n";
    for $tmp (@articleindex){
        print "  <LI><A HREF=\"#lfindex$i\">$tmp</A></LI>\n";
        $i++;
    }
    print "  <LI><A HREF=\"$lftalkback?anum=$articlenumber&lang=$lang\">".$intdat{$lang}{'lftalkback'}."</A></LI>\n" if (!$opt_T && $articlenumber > 100);
    print "</UL>\n</P>";
}else{
    print STDERR "Warning: could not generate an article index\n";
}
print "\n</TD></TR></TABLE>\n<!-- HEAD OF THE ARTICLE -->\n";
print "<H2>$articletitle</H2>\n $articleimage";
print "<P><FONT COLOR=\"#336633\">$intdat{$lang}{abstract}</FONT>:\n<P>\n";
$tmp= join "", @articleabstract;
print $tmp;
print "</P><HR size=\"2\" noshade><BR>\n";
print "<!-- BODY OF THE ARTICLE -->\n";
print "$articlebody\n";
print "<A NAME=\"talkback\">&nbsp;</a>
<h2>$intdat{$lang}{lftalkback}</h2>
$intdat{$lang}{talkbacktext}
<center>
<table border=\"0\"  CELLSPACING=\"2\" CELLPADDING=\"1\">
 <tr BGCOLOR=\"#C2C2C2\"><td align=center>
  <table border=\"3\"  CELLSPACING=\"2\" CELLPADDING=\"1\">
   <tr BGCOLOR=\"#C2C2C2\"><td align=center>
    <A href=\"$lftalkback?anum=$articlenumber&lang=$lang\"><b>&nbsp;$intdat{$lang}{goto_talkback}&nbsp;</b></a>
   </td></tr></table>
</td></tr></table>
</center>
\n" if (!$opt_T && $articlenumber > 100);
print "<HR size=\"2\" noshade>\n";
print "<!-- ARTICLE FOOT -->
<CENTER><TABLE WIDTH=\"95%\">
<TR><TD ALIGN=CENTER BGCOLOR=\"#777777\">
<A HREF=\"../../common/lfteam.html\"><FONT COLOR=\"#FFFFFF\">$intdat{$lang}{wwwresp}</FONT></A>
<BR><FONT COLOR=\"#FFFFFF\">&copy; $articleauthorname <BR>LinuxFocus.org 2000</FONT>
";
if ($opt_C){
    print "</TD>\n";
}else{
    print "<BR><a href=\"${lfcomment}?lang=${lang}&article=$articlename\" target=\"_TOP\"><FONT COLOR=\"#FFFFFF\">$intdat{$lang}{lfcomment}</FONT></A><BR></TD>\n";
}

if (scalar(@articletransinfo)>3){
    print "<TD BGCOLOR=\"#777777\"><!-- TRANSLATION INFO -->\n";
    print "<font size=2>$intdat{$lang}{transinfo}:</font><TABLE>\n";
    while(@articletransinfo){
        $tmp=shift(@articletransinfo);
        print "<tr><td><font size=2>$tmp</font></td>\n";
        $tmp=shift(@articletransinfo);
        print "    <td><font size=2>-&gt;</font></td>\n";
        print "    <td><font size=2>$tmp</font></td>\n";
        $tmp=shift(@articletransinfo);
        print "    <td><font size=2>$tmp</font></td>\n";
        print "</tr>\n";
    }
    print "</TABLE></TD>\n";
}else{
    print "<!-- OLD FORMAT, NO TRANSLATION INFO -->\n";
}
print "</TR></TABLE></CENTER>\n";
print "<p><font size=1>$today, generated by lfparser version $ver</font></p>\n</BODY>\n</HTML>\n";

}

#-----
# handle the parsed text chunks.
sub evalarticle{
    my $i=0;
    my $type;
    my $content; 
    my $transinfostate=0; 
    # states in which we ignore <P> <BR> </P>
    my %ignorePandBR=(1=>1,2=>1,3=>1,4=>1,5=>1,6=>1,7=>1,8=>1,11=>1,12=>1);
    for $type (@parsedtypes){
        # remove empty text and &nbsp; which is inserted by WYSIWYG editors
        $parseddata[$i]=~ s/\&nbsp\;//g if ($type eq "Text");
        if ($type eq "Text" && $parseddata[$i]=~ /^[\r\n\t ]+$/){
            $i++; next;
        }
        # dbg, debug:
        #print "-- $parsestate: $parseddata[$i] type: $type --\n";
        # start of article, search for heading:
        if ($parsestate==0 && $type=~/HeadingLevelTag/){
            if ($type eq "HeadingLevelTag1"){
                $articletitle=$parseddata[$i];
                $articletitle=~s/\s+/ /g;
                $parsestate++;
            }else{
                die "ERROR: The first heading must be the title of the article on level 1. Note: you may not have \"_LF_\" or nested tags in the title.\n";
            }
            $i++; next;
        }
        # ignoring of <P>, <BR>, </P> in certain states:
        if ($ignorePandBR{$parsestate}){
            if ($type eq "StartTag" && $parseddata[$i] =~/^P$/i){ $i++; next;}
            if ($type eq "StartTag" && $parseddata[$i] =~/^br$/i){ $i++; next;}
            if ($type eq "EndTag" && $parseddata[$i] =~/^\/P$/i){ $i++; next;}
        }
        # start of article, search for ArticleCategory:
        if ($parsestate==1){
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleCategory/){
                $parsestate++;
            }else{
                die "ERROR: The second heading must be ArticleCategory on level 4\n";
            }
            $i++; next;
        }
        #--
        # looking for the category
        if ($parsestate==2){
            if ($type eq "Text"){
                $articlecategory=$parseddata[$i];
                $articlecategory=~s/\s+/ /g;
                $articlecategory=~s/^\s+//g;
                $articlecategory=~s/\s+$//g;
                $parsestate++;
            }else{
                die "ERROR: The heading ArticleCategory must be followed by a text plain string without tags\n";
            }
            $i++; next;
        }
        #--
        # looking for the image heading
        if ($parsestate==3){
            if ($type eq "HeadingLevelTag4"){
                $parsestate++;
            }else{
                die "ERROR: The 3-rd heading must be AuthorImage after ArticleCategory description\n";
            }
            $i++; next;
        }
        #--
        # looking for the image 
        if ($parsestate==4){
            if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
                $parsestate++;
                $articleauthorimg=$parseddata[$i];
            }else{
                die "ERROR: Image of author missing after AuthorImage heading\n";
            }
            $i++; next;
        }
        #--
        # looking for the AuthorName
        if ($parsestate==5){
            # the old format is AuthorName the new is TranslationInfo
            # and they are mutual exclusive
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AuthorName/){
                $parsestate=6;
            }elsif ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/TranslationInfo/){
                $parsestate=7;
            }else{
                die "ERROR: AuthorName or TranslationInfo must be the heading after the Image\n";
            }
            $i++; next;
        }
        #--
        # looking for the name and e-mail or home-page 
        if ($parsestate==6){
            if ($type eq "AnchorTag"){
                $articleauthor="<" . $parseddata[$i] . ">";
                if ($parseddata[$i]=~/\" *>([\w\&\;\.\,\-\s]+)<\//){
                    $articleauthorname=$1;
                }else{
                    die "ERROR: please write the name of the article author such tat it can be easily written in other languages (Letters A-Z and &Uuml; &eacute; &egrave; etc..)\n";
                }
                $parsestate=8;
            }else{
                die "ERROR: AuthorName must followed by an anchor tag\n";
            }
            $i++; next;
        }
        #--
        # looking for the name and e-mail or home-page 
        # parse the TranslationInfo pre-tag:
        if ($parsestate==7){
            if ($transinfostate == 0){
                if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){
                    $transinfostate++;
                    die "ERROR: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
                    push(@articletransinfo,$1);
                    push(@articletransinfo,"--");
                    $i++; next;
                }else{
                    die "ERROR1: in $parseddata[$i]: TranslationInfo must be followed by pargraph that looks like: <p>original in LANG <a href=\"mailto:....\">Author Name</a></p>\n";
                }
            }
            if ($transinfostate == 1){
                if ($type eq "AnchorTag"){
                    $articleauthor="<" . $parseddata[$i] . ">";
                    $transinfostate++;
                    if ($parseddata[$i]=~/=[\'\"]([^\"\']+)[\'\"] *>([\w\&\;\.\,\-\s]+)<\//){
                        $articleauthorname=$2;
                    }else{
                        die "ERROR2: please write the name of the article author such tat it can be easily written in other languages (Letters A-Z and &Uuml; &eacute; &egrave; etc..)\n";
                    }
                    push(@articletransinfo,"<a href=\"$1\"><FONT COLOR=\"#FFFFFF\">$2</FONT></a>");
                    $i++; next;
                }else{
                    die "ERROR3: TranslationInfo must be followed by pargraph that looks like: <p>original in LANG <a href=\"mailto:....\">Author Name</a></p>\n";
                }
            }
            if (($transinfostate % 2) == 0){
                if($type eq "Text" && $parseddata[$i]=~/(\w+) +to +(\w+)/i){
                    $transinfostate++;
                    die "ERROR4: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
                    push(@articletransinfo,$1);
                    push(@articletransinfo,$2);
                    $i++; next;
                # looking for the AboutTheAuthor
                }elsif ($type eq "HeadingLevelTag4"){
                    # here we look also for the next heading:
                    if ($parseddata[$i]=~/AboutTheAuthor/){
                        $parsestate=9;
                        die "ERROR7: TranslationInfo not complete\n" unless((scalar(@articletransinfo) %3) ==0);
                    }else{
                        die "ERROR8: The heading after TranslationInfo must be AboutTheAuthor\n";
                    }
                    $i++; next;
                }else{
                    die "ERROR5: in $parseddata[$i]: TranslationInfo must have a pargraph that looks like: <p>LANG1 to LANG2<a href=\"mailto:....\">Translator Name</a></p>\nAdditional &nbsp; and other things are not allowed\n";
                }
            }
            if (($transinfostate % 2) == 1){
                if ($type eq "AnchorTag"){
                    $transinfostate++;
                    if ($parseddata[$i]=~/=[\'\"]([^\"\']+)[\'\"] *>([\w\&\;\.\,\-\s]+)<\//){
                    push(@articletransinfo,"<a href=\"$1\"><FONT COLOR=\"#FFFFFF\">$2</FONT></a>");
                    }else{
                        die "ERROR2: please write the name in TranslationInfo ($parseddata[$i]) such that it can be easily written in other languages (Letters A-Z and &Uuml; &eacute; &egrave; etc..)\n";
                    }
                    $i++; next;
                }else{
                    die "ERROR6: TranslationInfo must have a pargraph that looks like: <p>LANG1 to LANG2<a href=\"mailto:....\">Translator Name</a></p>\n";
                }
            }
            $i++; next;
        }
        #--
        # looking for the AboutTheAuthor when there is no TranslationInfo
        if ($parsestate==8){
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AboutTheAuthor/){
                $parsestate++;
            }else{
                die "ERROR: The heading after AuthorName must be AboutTheAuthor\n";
            }
            $i++; next;
        }
        #--
        # reading about the author (html text without heading)
        if ($parsestate==9){
            if ($type=~/HeadingLe/){
                if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/Abstract/){
                    $parsestate++;
                }else{
                    die "ERROR: The heading after the \"about the author\" paragraph must be the Abstract\n";
                }
            }else{
                # reading any html:
                if ($type=~/Tag/){
                    push(@articleaboutauthor,"<" . $parseddata[$i] . ">");
                }elsif ($type eq "Text"){
                    push(@articleaboutauthor,$parseddata[$i]);
                }else{
                    die "Programm error, unknown type $type in about author\n";
                }
            }
            $i++; next;
        }
        #--
        # reading the abstract (html text without heading)
        if ($parsestate==10){
            if ($type=~/HeadingLe/){
                if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleIllustration/){
                    $parsestate++;
                }else{
                    die "ERROR: The heading after the abstract paragraph must be ArticleIllustration\n";
                }
            }else{
                # reading any html:
                if ($type=~/Tag/){
                    push(@articleabstract,"<" . $parseddata[$i] . ">");
                }elsif ($type eq "Text"){
                    push(@articleabstract,$parseddata[$i]);
                }else{
                    die "Programm error, unknown type $type in abstract\n";
                }
            }
            $i++; next;
        }
        #--
        # looking for the article illustration 
        if ($parsestate==11){
            if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
                $parsestate++;
                $articleimage="<" . $parseddata[$i] . ">";
            }else{
                die "ERROR: Image of article missing after ArticleIllustration heading\n";
            }
            $i++; next;
        }
        #--
        # looking for the ArticleBody is already checked in the parser:
        if ($parsestate==12){
            if ($type eq "Body"){
                $articlebody=$parseddata[$i];
                $articlebody=~s|<A NAME="lfindex\d+">&nbsp;</A>||g;
            }else{
                die "Program error: state 12 but tag-type $type instead of ArticleBody\n";
            }
            $i++; next;
        }
        #--
        $i++;
    }
    die "ERROR: invalid article meta-format, debug state $parsestate. Either you do not have a <H1> at the beginning or there is still a bug in lfparser.\n" unless ($parsestate == 12);
    &parsebodyforindex(\$articlebody);
    unless ($validcat{$articlecategory}){
        print STDERR "ERROR invalid article category $articlecategory\n";
        print STDERR "valid categories are:\n";
        foreach (keys %validcat){
            print STDERR " - \"$_\"\n";
        }
        exit 1;
    }
}
#-----
# generate an index for the file.
# parse the html file body and store the H2 H3 text stings in @articleindex
# parse takes a ref to a text string as argument.
sub parsebodyforindex($){
    my $text = shift;
    my @body;
    my $h;
    my $i=0;
    while (1) {
        # First we try to pull off any plain text (anything before a "<" char)
        if ($$text =~ /\G([^<]+)/gcs) {
            push(@body,$1);
        } elsif ($$text =~ /\G<\/HTML>/igcs) {
            next;
        } elsif ($$text =~ /\G<\/body>/igcs) {
            next;
        } elsif ($$text =~ /\G<[hH]3>(.+?)<\/[hH]3>/gcs) {
            $h=$1;
            push(@body,"<A NAME=\"lfindex$i\">&nbsp;</A>\n<H3>".$h ."</H3>\n");
            push(@articleindex,$h);
            $i++;
        } elsif ($$text =~ /\G<[hH]2>(.+?)<\/[hH]2>/gcs) {
            $h=$1;
            push(@body,"<A NAME=\"lfindex$i\">&nbsp;</A>\n<H2>".$h ."</H2>\n");
            push(@articleindex,$h);
            $i++;
        } elsif ($$text =~ m|\G(<[^>]*>)|gcs) {
            push(@body,$1);
        } else {
            # the string is exhausted, or there's no > in it.
            last;
        }
    }
    foreach $h (@articleindex){
        $h=~s/<.+?>//g;
    }
    $articlebody=join "",@body;
}
#-----
# parse the html file and store the result in @parseddata, @parsedtypes.
# parse takes a ref to a text string as argument.
sub parse($){
    my $text = shift;
    my $type;
    my $content; 
    while (1) {
        # First we try to pull off any plain text (anything before a "<" char)
        if ($$text =~ /\G([^<]+)/gcs) {
            $content = $1; $type = 'Text';
        } elsif ($$text =~ /\G<(!--.*?--)>/gcs) {
            # we ignore comments except if they are in the article body:
            next;
            #$type = 'Comment';
            #$content = $1;
        } elsif ($$text =~ /\G<(!.*?)>/gcs) {
            $type = 'Markup';
            $content = $1;
        # Then, look for an end tag
        } elsif ($$text =~ m|\G<(/[a-zA-Z][^<]*?)>|gcs) {
            $content = $1; $type = 'EndTag';
        # Look for a <a ..> ..</a> tag:
        } elsif ($$text =~ /\G<([aA] [^>]+>([^<]+)<\/[aA])>/gcs) {
            $content = $1; $type = "AnchorTag";
        # Look for a h[0-9] tag:
        } elsif ($$text =~ /\G<[hH](\d)>([^<]+)<\/[hH]\d>/gcs) {
            $content = $2; $type = "HeadingLevelTag$1";
            if ("$1" eq "4" && index($content,"ArticleBody")> -1){
                $content=$';
                $type="Body";
                push(@parseddata,$content);
                push(@parsedtypes,$type);
                last;
            }
        # Then, finally we look for a start tag
        # We know the first char is <, make sure there's a >
        } elsif ($$text =~ /\G<(.+?)>/gcs) {
            $content = $1; $type = 'StartTag';
        } else {
            # the string is exhausted, or there's no > in it.
            last;
        }
        #print "dbg $content type: $type\n";
        push(@parseddata,$content);
        push(@parsedtypes,$type);
    }
}
#--------------
sub htmlumlaute($){
    my $txt_ptr=shift;
	$$txt_ptr=~s/¡/\&iexcl;/g;
	$$txt_ptr=~s/¿/\&iquest;/g;
	$$txt_ptr=~s/À/\&Agrave;/g;
	$$txt_ptr=~s/Á/\&Aacute;/g;
	$$txt_ptr=~s/Â/\&Acirc;/g;
	$$txt_ptr=~s/Ã/\&Atilde;/g;
	$$txt_ptr=~s/Ä/\&Auml;/g;
	$$txt_ptr=~s/Å/\&Aring;/g;
	$$txt_ptr=~s/Ç/\&Ccedil;/g;
	$$txt_ptr=~s/È/\&Egrave;/g;
	$$txt_ptr=~s/É/\&Eacute;/g;
	$$txt_ptr=~s/Ê/\&Ecirc;/g;
	$$txt_ptr=~s/Ë/\&Euml;/g;
	$$txt_ptr=~s/Ì/\&Igrave;/g;
	$$txt_ptr=~s/Í/\&Iacute;/g;
	$$txt_ptr=~s/Î/\&Icirc;/g;
	$$txt_ptr=~s/Ï/\&Iuml;/g;
	$$txt_ptr=~s/Ñ/\&Ntilde;/g;
	$$txt_ptr=~s/Ò/\&Ograve;/g;
	$$txt_ptr=~s/Ó/\&Oacute;/g;
	$$txt_ptr=~s/Ô/\&Ocirc;/g;
	$$txt_ptr=~s/Õ/\&Otilde;/g;
	$$txt_ptr=~s/Ö/\&Ouml;/g;
	$$txt_ptr=~s/Ø/\&Oslash;/g;
	$$txt_ptr=~s/Ù/\&Ugrave;/g;
	$$txt_ptr=~s/Ú/\&Uacute;/g;
	$$txt_ptr=~s/Û/\&Ucirc;/g;
	$$txt_ptr=~s/Ü/\&Uuml;/g;
	$$txt_ptr=~s/Ý/\&Yacute;/g;
	$$txt_ptr=~s/ß/\&szlig;/g;
	$$txt_ptr=~s/à/\&agrave;/g;
	$$txt_ptr=~s/á/\&aacute;/g;
	$$txt_ptr=~s/â/\&acirc;/g;
	$$txt_ptr=~s/ã/\&atilde;/g;
	$$txt_ptr=~s/ä/\&auml;/g;
	$$txt_ptr=~s/å/\&aring;/g;
	$$txt_ptr=~s/æ/\&aelig;/g;
	$$txt_ptr=~s/ç/\&ccedil;/g;
	$$txt_ptr=~s/è/\&egrave;/g;
	$$txt_ptr=~s/é/\&eacute;/g;
	$$txt_ptr=~s/ê/\&ecirc;/g;
	$$txt_ptr=~s/ë/\&euml;/g;
	$$txt_ptr=~s/ì/\&igrave;/g;
	$$txt_ptr=~s/í/\&iacute;/g;
	$$txt_ptr=~s/î/\&icirc;/g;
	$$txt_ptr=~s/ñ/\&ntilde;/g;
	$$txt_ptr=~s/ò/\&ograve;/g;
	$$txt_ptr=~s/ó/\&oacute;/g;
	$$txt_ptr=~s/ô/\&ocirc;/g;
	$$txt_ptr=~s/ö/\&ouml;/g;
	$$txt_ptr=~s/ù/\&ugrave;/g;
	$$txt_ptr=~s/ú/\&uacute;/g;
	$$txt_ptr=~s/û/\&ucirc;/g;
	$$txt_ptr=~s/ü/\&uuml;/g;
}
#--------------
sub today{
    my @ltime = localtime;
    #return a date in yyyy-mm-dd format
    my $today;

    $today =  sprintf("%04d-%02d-%02d",1900 + $ltime[5],$ltime[4] + 1,$ltime[3]);
    $today;
}
#-----
#
sub help{
print "lfparser -- parse a LinuxFocus article in HTML meta syntax and
generate a final LinuxFocus article. The HTML meta syntax is described
in http://www.linuxfocus.org/developer/Guido/lfparser.html
It is a special HTML format that can easily be edited and converted to
the released article format. It gives LinuxFocus the flexibilty to change
the layout without editing all articles.

USAGE: lfparser [-hkt][-l cn|de|en|es|gb|jp|ko|nl|ru|tr] article.meta.html > article.html 
OPTIONS: -h this help
         -C do not generate a link to lfcomment
         -l select a language for the output
         -k list all valid categories and exit
         -T do not include talkback
         -t test mode. This inserts a <BASE href=..> into the
            article to include the images and other stuff from 
            ../../common/ without the need to have them locally available.
            This option must not be used for the final article.

EXAMPLE: lfparser -l fr article.meta.html > article.html

This is lfparser version: $ver\n";

exit;
}
__END__ 


