#!/usr/bin/perl -w
# vim: set sw=4 ts=4 si et nowrap:
# Copyright: GPL
# Author: Guido Socher, guido at linuxfocus.org
#
no locale;
use strict;
use vars qw($opt_f $opt_p $opt_P $opt_s $opt_o $opt_T $opt_C $opt_t $opt_k $opt_l $opt_h $opt_v);
use Getopt::Std;
use IO::Handle;
require 5.004;
# prototypes:
sub main::help();
sub main::today();
sub main::check_for_lfparsercfg($);
sub main::htmlumlaute($);
sub main::parse($);
sub main::printlf_format();
#
# You may change the $lang="en" line if you want another default language
# however it is much better if you create in your home directory
# a file called .lfparsercfg and write in there
# lang = en
# That way you do not need to change the code for each new version
# of lfparser.
my $lang="en";
my $style=2; # default header style
#
my %config; # will contain values from ~/.lfparsercfg
#
my $ver ="2.51";
#
my %validcat=("Forum"=>1,"Applications"=>1,"Hardware"=>1,'Webdesign'=>1,
    'SystemAdministration'=>1,'SoftwareDevelopment'=>1,'Graphics'=>1,
    'Community'=>1,'UNIXBasics'=>1,'KernelCorner'=>1,'Interviews'=>1,
    'Games'=>1
    );
my $lftalkback="http://cgi.linuxfocus.org/cgi-bin/lftalkback";
#
#Note: the following is automatically overloaded. Only the chset is mandatory
#      if one key does not exit in language then it is taken from English (en)
my %intdat=(
 'ar'=>{'chset'=>"windows-1256",'abstract'=>' ','content'=>'','wwwresp'=>'    ','aboutauthor'=>'  ','auth'=>'','transinfo'=>'  ','home'=>'','map'=>'','index'=>'','search'=>'','news'=>'','archives'=>'','links'=>'','aboutus'=>' ','topmap'=>'Topbar-ar.gif','botmap'=>'Bottombar-ar.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]', 'lftalkback'=>'   ','talkbacktext'=>'    ʡ       . ','goto_talkback'=>' ','TranslatedToThisLangBy'=>'  '},
 'de'=>{'chset'=>"iso-8859-1",'abstract'=>'Zusammenfassung','content'=>'Inhalt','wwwresp'=>'Der LinuxFocus Redaktion schreiben','aboutauthorfemale'=>'&Uuml;ber die Autorin','aboutauthormplural'=>'&Uuml;ber die Autoren','aboutauthor'=>'&Uuml;ber den Autor','auth'=>'von','home'=>'Home','issueindex'=>'Zum index dieser Ausgabe','map'=>'Plan','index'=>'Index','search'=>'Suchen','news'=>'Nachrichten','archives'=>'Archiv','links'=>'Links','aboutus'=>'&Uuml;ber uns','transinfo'=>'Autoren und &Uuml;bersetzer','lftalkback'=>'Talkback f&uuml;r diesen Artikel','talkbacktext'=>'Jeder Artikel hat seine eigene Seite f&uuml;r Kommentare und R&uuml;ckmeldungen. Auf dieser Seite kann jeder eigene Kommentare abgeben und die Kommentare anderer Leser sehen:','goto_talkback'=>'Talkback Seite','topmap'=>'Topbar-de.gif','botmap'=>'Bottombar-de.gif','TranslatedToThisLangBy'=>'&Uuml;bersetzt ins Deutsche von'},
 'en'=>{'chset'=>"iso-8859-1",'abstract'=>'Abstract','content'=>'Content','wwwresp'=>'Webpages maintained by the LinuxFocus Editor team','aboutauthorfemale'=>'About the author','aboutauthormplural'=>'About the authors','aboutauthorfplural'=>'About the authors','aboutauthor'=>'About the author','auth'=>'by','home'=>'Home','up'=>'&lt;--','issueindex'=>'Go to the index of this issue','map'=>'Map','index'=>'Index','search'=>'Search','news'=>'News','archives'=>'Archives','links'=>'Links','aboutus'=>'About LF','transinfo'=>'Translation information','topmap'=>'Topbar-en.gif','botmap'=>'Bottombar-en.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lftalkback'=>'Talkback form for this article','talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:','goto_talkback'=>'talkback page','TranslatedToThisLangBy'=>'Translated to English by','proofread'=>'proof read'},
 'it'=>{'chset'=>"iso-8859-1",'issueindex'=>'Indice di questo numero','abstract'=>'Premessa','content'=>'Contenuto','auth'=>' ','aboutauthor'=>'L\'autore','home'=>'Home','map'=>'Mappa','index'=>'Indice','search'=>'Cerca','news'=>'News','archives'=>'Archivo','links'=>'Link','aboutus'=>'Cose LF','lftalkback'=>'Discussioni su quest\'articolo','talkbacktext'=>'ogni articolo possiede una sua pagina di discussione, da questa pagina puoi inviare un commento o leggere quelli degli altri lettori:','goto_talkback'=>'pagina di discussione','TranslatedToThisLangBy'=>'Tradotto in Italiano da'},
 'id'=>{'chset'=>"iso-8859-1",'TranslatedToThisLangBy'=>'Diterjemahkan Ke Indonesia Oleh','abstract'=>'Abstrak', 'content'=>'Daftar Isi', 'wwwresp'=>'Halaman Web Dirawat oleh Team Editor LinuxFocus ', 'aboutauthor'=>'Tentang Penulis', 'auth'=>'oleh', 'home'=>'Home', 'issueindex'=>' kembali ke indeks dari pembicaraan ini', 'map'=>' Peta', 'index'=>'Indeks', 'search'=>'Cari', 'news'=>'Berita', 'archives'=>'Arsip', 'links'=>'Links', 'aboutus'=>'Tentang LF', 'transinfo'=>'Informasi Terjemahan', 'lftalkback'=>'Komentar Balik Untuk Artikel ini', 'talkbacktext'=>'Setiap Artikel mempunyai halaman komentar sendiri. Pada halaman ini Anda bisa mengirimkan komentar atau melihat komentar dari pembaca lainnya:', 'goto_talkback'=>'Halaman Komentar'},
 'es'=>{'chset'=>"iso-8859-1",'issueindex'=>'&Iacute;ndice de este n&uacute;mero','abstract'=>'Resumen','content'=>'Contenidos','wwwresp'=>'Contactar con el equipo de LinuFocus','aboutauthor'=>'Sobre el autor','auth'=>'por','transinfo'=>'Informaci&oacute;n sobre la traducci&oacute;n','home'=>'Hogar','map'=>'Mapa','index'=>'Indice','search'=>'Busqueda','news'=>'Noticias','archives'=>'Arca','links'=>'Enlaces','aboutus'=>'Sobre LF','topmap'=>'Topbar-es.gif','botmap'=>'Bottombar-es.gif','lftalkback'=>'Formulario de "talkback" para este art&iacute;culo','talkbacktext'=>'Cada art&iacute;culo tiene su propia p&aacute;gina de "talkback". A trav&eacute;s de esa p&aacute;gina puedes enviar un comentario o consultar los comentarios de otros lectores','goto_talkback'=>'Ir a la p&aacute;gina de "talkback"','TranslatedToThisLangBy'=>'Traducido al espa&ntilde;ol por'},
 'fr'=>{'chset'=>"iso-8859-1",'issueindex'=>'Sommaire de ce num&eacute;ro','abstract'=>'R&eacute;sum&eacute;','content'=>'Sommaire','wwwresp'=>'Site Web maintenu par l&acute;&eacute;quipe d&acute;&eacute;dition LinuxFocus','aboutauthor'=>'L&acute;auteur','auth'=>'par','home'=>'Sommaire','map'=>'Carte','index'=>'Index','search'=>'Recherche','news'=>'Nouvelles','archives'=>'Archives','links'=>'Liens','aboutus'=>'A propos','topmap'=>'Topbar-fr.gif','botmap'=>'Bottombar-fr.gif','alttop'=>'[Barre Superieure]','altbot'=>'[Barre Inferieure]','TranslatedToThisLangBy'=>'Traduit en Franais par'},
 'nl'=>{'chset'=>"iso-8859-1",'issueindex'=>'Terug naar de titelpagina van dit nummer','abstract'=>'Kort','content'=>'Inhoud','wwwresp'=>'Site onderhouden door het LinuxFocus editors team','aboutauthor'=>'Over de auteur','auth'=>'door', 'transinfo'=>'Vertaling info','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Zoek','news'=>'Nieuws','archives'=>'Archieven','links'=>'Links','aboutus'=>'Over LF','topmap'=>'Topbar-nl.gif','botmap'=>'Bottombar-nl.gif','alttop'=>'[Hoofd-balk]','altbot'=>'[Voet-balk]','lftalkback'=>'Talkback voor dit artikel','talkbacktext'=>'Elk artikel heeft zijn eigen talkback pagina. Daar kan je commentaar geven of commentaar van anderen lezen:','goto_talkback'=>'talkback pagina','TranslatedToThisLangBy'=>'Vertaald naar het Nederlands door'},
 'gb'=>{'chset'=>"gb2312", 'abstract'=>'ժҪ', 'content'=>'Ŀ¼', 'wwwresp'=>'ҳLinuxFocus༭ά', 'aboutauthor'=>'', 'auth'=>'by', 'home'=>'ҳ', 'map'=>'վͼ', 'index'=>'', 'search'=>'', 'news'=>'', 'archives'=>'ڿ', 'links'=>'', 'aboutus'=>'LF', 'transinfo'=>'Ϣ', 'lftalkback'=>'ƪ·', 'talkbacktext'=>'ÿƪ¶иԵķҳ档ҳύۣҲԲ鿴ߵۣ', 'goto_talkback'=>'ҳ'}, 
 'jp'=>{'chset'=>"ISO-2022-JP",'TranslatedToThisLangBy'=>'Translated to Japanese by'},
 'ko'=>{'chset'=>"EUC-KR",'abstract'=>'','content'=>'','wwwresp'=>' Ʈ Ŀ  մϴ','aboutauthor'=>'۾ Ұ','auth'=>'','home'=>'ʱȭ','map'=>'','index'=>'','search'=>'ãƺ','news'=>'ҽ','archives'=>' ','links'=>'õƮ','aboutus'=>'LF Ͽ','topmap'=>'Topbar-kr.gif','botmap'=>'Bottombar-kr.gif','alttop'=>'޴','altbot'=>'޴','lftalkback'=>' 翡  ǰ ֽϴ','talkbacktext'=>'  ǵ   ֽϴ. ǵ Ͽ  ڿ ǰ ų ٸ  ǰ   ֽϴ.:','goto_talkback'=>'ǵ ','TranslatedToThisLangBy'=>'Translated to Korean by'},
 'ru'=>{'chset'=>"koi8-r",'issueindex'=>'    ','abstract'=>'','content'=>'','aboutauthor'=>' ','auth'=>'','home'=>'','map'=>'','index'=>'','search'=>'','news'=>'','archives'=>'','links'=>'', 'aboutus'=>' LF','TranslatedToThisLangBy'=>'  '},
 'tr'=>{'chset'=>"iso-8859-9",'issueindex'=>'Bu saynn ana sayfasna gider','abstract'=>'zet','content'=>'erik','wwwresp'=>'Grselyre sayfalarnn bakm, LinuxFocus Editrleri tarafndan yaplmaktadr','auth'=>' ','home'=>'Ana Sayfa','map'=>'Eriimdzeni','index'=>'indekiler','search'=>'Arama','news'=>'Duyumlar','archives'=>'Belgelikler','links'=>'Balantlar', 'topmap'=>'Topbar-tr.gif','botmap'=>'Bottombar-tr.gif','aboutus'=>'LF Nedir','aboutauthor'=>'Yazar hakknda','transinfo'=>'eviri bilgisi','lftalkback'=>'Bu yaz iin gr bildiriminde bulunabilirsiniz','talkbacktext'=>'Her yaz kendi gr bildirim sayfasna sahiptir. Bu sayfaya yorumlarnz yazabilir ve dier okuyucularn yorumlarna bakabilirsiniz.','TranslatedToThisLangBy'=>'Trke\'ye eviri'},
 'cn'=>{'chset'=>"Big-5",'TranslatedToThisLangBy'=>'Translated to Chinese by'},
 'pt'=>{'chset'=>"iso-8859-1", 'issueindex'=>'regressar ao &iacute;ndice desta edi&ccedil;&atilde;o','abstract'=>'Abstrato', 'content'=>'Conte&uacute;do', 'wwwresp'=>'P&aacute;ginas Web mantidas pelo time de Editores LinuxFocus', 'aboutauthor'=>'Sobre o autor', 'auth'=>'por', 'home'=>'In&iacute;cio', 'map'=>'Mapa', 'index'=>'&Iacute;ndice', 'search'=>'Procura', 'news'=>'Novidades', 'archives'=>'Arquivos', 'links'=>'Links', 'aboutus'=>'Sobre LF', 'transinfo'=>'Informa&ccedil;&atilde;o sobre tradu&ccedil;&atilde;o', 'lftalkback'=>'Forma de respostas para este artigo', 'talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:', 'talkbacktext'=>'Todo artigo tem sua pr&oacute;pria p&aacute;gina de respostas. Nesta p&aacute;gina voc&ecirc; pode enviar um coment&aacute;rio ou ver os coment&aacute;rios de outros leitores:', 'goto_talkback'=>'p&aacute;gina de respostas','TranslatedToThisLangBy'=>'Traduzido para Portugu&ecirc;s por'},
 'il'=>{'chset'=>"iso-8859-8",'ignore_chset'=>"windows-1255", 'abstract'=>'', 'aboutauthor'=> ' ', 'auth'=> '-', 'home'=> ' ', 'map'=> '', 'index'=> '', 'search'=> '', 'news'=> ' ', 'archives'=> '', 'links'=> '', 'aboutus'=> 'LF ', 'content'=>'','transinfo'=>'  ', 'lftalkback'=>'   "', 'talkbacktext'=>'     .            ', 'goto_talkback'=>' ', 'TranslatedToThisLangBy'=>'  "', 'proofread'=>' '},
 'pl'=>{'chset'=>"iso-8859-2", 'abstract'=>'Notka', 'content'=>'Zawarto', 'wwwresp'=>'Strona prowadzona przez redakcj LinuxFocus ', 'aboutauthor'=>'O Autorze', 'auth'=>' ', 'home'=>'Strona Gwna', 'map'=>'Mapa Serwisu', 'index'=>'Indeks', 'search'=>'Szukaj', 'news'=>'Nowoci', 'archives'=>'Archiwum', 'links'=>'Linki', 'aboutus'=>'O Nas', 'transinfo'=>'tumaczenie', 'lftalkback'=>'Dyskusja dotyczca tego artykuu', 'talkbacktext'=>'Komentarze do dyskusji:', 'goto_talkback'=>'Strona talkback'},
 # hindi:
 'hi'=>{'chset'=>"utf-8", 'abstract'=>'सारांश', 'content'=>'विषय वस्तु', 'wwwresp'=>'लिनक्सफ़ोकस सम्पादकगण द्वारा अनुरक्षित जालपृष्ठ', 'aboutauthor'=>'लेखक के बारे में', 'auth'=>'लेखकः', 'home'=>'मुख', 'map'=>'मानचित्र', 'index'=>'अनुक्रमणिका', 'search'=>'खोज', 'news'=>'समाचार', 'archives'=>'पुरालेख', 'links'=>'कड़ियाँ', 'aboutus'=>'लिनक्स फ़ोकस के बारे में', 'transinfo'=>'अनुवाद सम्बन्धी जानकारी', 'lftalkback'=>'इस लेख के लिये राय देने का पर्चा', 'talkbacktext'=>'हरेक लेख का अपना राय देने का पन्ना है। इस पन्ने पर आप अपनी टिप्पणी प्रकाशित कर सकते हैं या दूसरे पाठकों की टिप्पणियों को देख सकते हैं:', 'goto_talkback'=>'राय देने का पृष्ठ'},
 # serbian by  Aleksandar Milovac <amilovac@ptt.yu>
 'sr'=>{'chset'=>"UTF-8", 'abstract'=>'Резиме', 'content'=>'Садржај', 'wwwresp'=>'Web странице одржава тим уредника LF-а', 'aboutauthor'=>'О аутору', 'auth'=>'аутор', 'home'=>'Матична страница', 'issueindex'=>'назад на индекс овог броја', 'map'=>'Мапа', 'index'=>'Индекс', 'search'=>'Претрага', 'news'=>'Вести', 'archives'=>'Архиве', 'links'=>'Везе', 'aboutus'=>'О LF-у', 'transinfo'=>'Информације о превођењу', 'lftalkback'=>'Образац за коментар у вези са овим чланком', 'talkbacktext'=>'Сваки чланак има своју страницу за коментар. На овој страници можете поставити свој коментар или погледати коментаре других читаоца:', 'goto_talkback'=>'страница за коментар'},
 # farsi, (Persian) by Darioush Jalali <Darioush@math.com>:
 'ir'=>{'chset'=>"UTF-8", 'abstract'=>'خلاصه', 'content'=>'محتوا‌ نامه', 'wwwresp'=>'LinuxFocus صفحات مدیریت شده توسط سردبیران', 'aboutauthor'=>'درباره‌ی نویسنده', 'auth'=>'نوشته‌ی', 'home'=>'خانه', 'issueindex'=>'بازگشت به فهرست این شماره', 'map'=>'نقشه', 'index'=>'فهرست', 'search'=>'جستجو', 'news'=>'اخبار', 'archives'=>'آرشیو', 'links'=>'اتصالات', 'aboutus'=>'LF درباره‌ی', 'transinfo'=>'اطلاعات ترجمه', 'lftalkback'=>'صفحه‌ی نظرات این مقاله', 'talkbacktext'=>'هر مقاله صفحه‌ی نظرات خود را دارد. در این صفحه می‌توانید نظر خود را بیان کنید یا نظرات دیگران را بخوانید', 'goto_talkback'=>'صفحه‌ی نظرات'}, 
 );
#
# languages which can get the convert to palm:
my %palm=('en'=>1,'de'=>1,'fr'=>1,'nl'=>1,'pt'=>1,'es'=>1,'it'=>1,'tr'=>1);
# enforce html Umlaute for latin1
my %islatin=('en'=>1,'de'=>1,'pt'=>1,'fr'=>1,'nl'=>1,'es'=>1,'it'=>1);
#
# global data:
my $today;
my $parsestate=0;
my $palmdownload=0;
my @parsedtypes;
my @parseddata;
#
my $articlename;
my $articlenumber;
my $articlecategory;
my $articletitle;
my $articleauthorimg;
my $articleauthor;
my $articleauthorgender="";
my $articleauthorname;
my @articletransinfo=();
my @articleaboutauthor;
my @articleabstract;
my @articleindex;
my $articleimage;
my $articlebody;
my $somerights='<br>&quot;some rights reserved&quot; see <a href="../../license/">linuxfocus.org/license/</a>';
my ($fd_out,$infile,$tmpline,$linelen,$prestate);
#
#
my $text;
#
getopts("fvopPTCkl:hts:")||die "ERROR: No such option. -h for help.\n";
help() if ($opt_h);
if ($opt_v){
    print "lfparser version $ver\n";
    exit 0;
}
$today=today();
check_for_lfparsercfg(\%config);
#
if ($opt_p){
    $palmdownload=1;
}elsif($opt_P){
    $palmdownload=0;
}else{
    if (defined $config{'palm'} && $palm{$lang}){
        $palmdownload=$config{'palm'};
    }
}
#
if ($opt_k){
    print "Valid categories are:\n";
    $opt_k=0;
    foreach (sort keys %validcat){
        print " $_,";
        $opt_k++;
        print "\n" if ($opt_k % 4 ==0);
    }
    print "\nValid keyword headings are:
<H4>ArticleCategory:</H4>
<H4>AuthorImage:</H4>
<H4>TranslationInfo:</H4> or <H4>AuthorName:</H4>
<H4>AboutTheAuthor:</H4>
<H4>Abstract:</H4>
<H4>ArticleIllustration:</H4>
<H4>ArticleBody:</H4>\n";

    exit(0);
}
if ($config{'lang'}){
    die "ERROR: invalid language in configfile ~/.lfparsercfg\n" unless($intdat{$config{'lang'}}{'chset'});
    $lang=$config{'lang'};
}
if ($opt_l){
    die "ERROR: invalid language specifier\n" unless($intdat{$opt_l}{'chset'});
    $lang=$opt_l;
}
if (defined $config{'style'} && $config{'style'} eq "0"){
    $style=0;
}
if ($config{'style'} && $config{'style'} eq "1"){
    $style=1;
}
if ($config{'style'} && $config{'style'} eq "2"){
    $style=2;
}
$style=1 if ($opt_o);
$style=$opt_s if (defined $opt_s);

if ($opt_f){
    $somerights=", FDL";
}

# copy keys from the english section that are not defined in this one:
foreach (keys %{$intdat{'en'}}){
    if ($_ eq 'aboutauthorfemale' || $_ eq 'aboutauthormplural' || $_ eq 'aboutauthorfplural'){
        if ($intdat{$lang}{'aboutauthor'}){
            # take male form if special form is not available:
            $intdat{$lang}{$_} = $intdat{$lang}{'aboutauthor'} unless ($intdat{$lang}{$_});
            next;
        }
    }
    $intdat{$lang}{$_} = $intdat{'en'}{$_} unless ($intdat{$lang}{$_});
}
#
help() unless ($ARGV[0]);
$infile=$ARGV[0];
if ($opt_C){
    print STDERR "note, option -C is no longer supported\n";
}
$fd_out=new IO::Handle;
if (! -f "$infile" && $infile=~/^(\d+)$/){
    # only a number given. The file name is articleNUM.meta.shtml
    die "ERROR: no such file article$1.meta.shtml\n" unless(-f "article$1.meta.shtml");
    open(OUTFD,">article$1.shtml")||die "ERROR: can not write article$1.shtml\n";
    $infile="article$1.meta.shtml";
    $fd_out->fdopen(fileno(OUTFD),"w")||die;
    print STDERR "Language: $lang, Reading $infile .... writing article$1.shtml ...\n";
}else{
    $fd_out->fdopen(fileno(STDOUT),"w")||die "ERROR: can not write to stdout\n";
}
$articlename=$infile;
$articlename=~s/meta\.//;
# basename:
$articlename=~s=^.*/==;
if ($articlename=~/(\d+)/){
    $articlenumber=$1;
}else{
    $articlenumber=0;
}
open (FF,"$infile")||die "ERROR: can not read file $infile\n";
$text="";
# here we check that all the 7 key word headings on level h4 are available:
my $headcheck=0;
my %valhead=('ArticleCategory'=>1,'AuthorImage'=>2,'AuthorName'=>3,
    'TranslationInfo'=>3,'AboutTheAuthor'=>4,'Abstract'=>5,
    'ArticleIllustration'=>6,'ArticleBody'=>7);
my %missingheading=(1=>'ArticleCategory',2=>'AuthorImage',3=>'TranslationInfo',
    4=>'AboutTheAuthor',5=>'Abstract',6=>'ArticleIllustration',7=>'ArticleBody');
my $ArticleBody=0;
my $l=0;
while(<FF>){
    $l++;
    chomp;
    if ($headcheck < 7 && /<h4>\s*(\w+)/i){
        $headcheck++;
        $ArticleBody=1 if ($headcheck==7);
        if ($valhead{$1}){
            # it's a valid heading 
            unless($valhead{$1}==$headcheck){
                die "ERROR: before line $l, I was expecting key word heading $missingheading{$headcheck}, but I found already $1\n";
            }
        }else{
            die "ERROR: line $l, key word heading not valid. The only valid
headings are:
<H4>ArticleCategory:</H4>
<H4>AuthorImage:</H4>
<H4>TranslationInfo:</H4> or <H4>AuthorName:</H4>
<H4>AboutTheAuthor:</H4>
<H4>Abstract:</H4>
<H4>ArticleIllustration:</H4>
<H4>ArticleBody:</H4>
They must come in this order and with the exact spelling as above.
One of the headings is missing or has wrong spelling.\n";
        }
    }
    s/_LF_/Linux<font color=\"#FF0000\">Focus<\/font>/g;
    s/\s+$//g; # kill tailing space
    #
    if (/(&.*\W)/){
        $tmpline=$1;
        if ($tmpline!~/&\w+;/ && $tmpline!~/&#/){
            print STDERR "$infile:${l}: Warning unescaped & in \"$tmpline\" should be written as &amp;\n";
        }
    }
    if (/<pre>/i){
        $prestate=1;
    }
    if (/<\/pre>/i){
        $prestate=0;
    }
    if ($prestate){
        # check line lenght
        $tmpline=$_;
        # count things like &amp; &uuml; as one charcater only:
        $tmpline=~s/&\w+;/x/g;
        $linelen=length($tmpline) - 81;
        # up to 82 should be ok:
        if ($linelen > 1){
            print STDERR "$infile:${l}: Warning line inside <pre> too long. This causes problems when printing the article. Try to make this line $linelen characters shorter.\n";
        }
    }
    $text.="$_\n"; # write in one long variable
}
close FF;
unless ($ArticleBody){
    die "ERROR: key word heading <H4>ArticleBody:</H4> not found\n";
}
if ($islatin{$lang}){
    htmlumlaute(\$text);
}
parse(\$text);
evalarticle();
$articleauthor=~s/\@/&#64;/g; # harden spamers life
printlf_format();

#-----
# read ~/.lfparsercfg
sub check_for_lfparsercfg($){
    my $cfghashref=shift;
    my $home=(getpwuid($>))[7];
    return 1 unless( -r "$home/.lfparsercfg");
    open(CFG,"< $home/.lfparsercfg")||die;
    while(<CFG>){
        next if (/^\s*#/);
        s/#.*//;
        s/\s+//g;
        if (/(\w+)=(\S+)/){
            $cfghashref->{$1}=$2;
        }
    }
    close CFG;
    return 0;
}
#-----
# Take the global data and print an article in LF format
sub printlf_format(){
    my $tmp;
    my $i=0;
    my $base="";
    if ($opt_t){
        $base="<BASE href=\"http://www.linuxfocus.org/English/articles/\">";
    }
    $fd_out->print("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n");
    if ($lang eq "ar"){
        $fd_out->print("<HTML DIR=\"rtl\">\n");
    }elsif ($lang eq "ir"){
        $fd_out->print("<HTML DIR=\"rtl\">\n");
    }else{
        $fd_out->print("<HTML>\n");
    }
$fd_out->print("<HEAD>
 <META http-equiv=\"Content-Type\" content=\"text/html; charset=$intdat{$lang}{chset}\">
 <META NAME=\"GENERATOR\" CONTENT=\"lfparser_$ver\">
 <META NAME=\"LFCATEGORY\" CONTENT=\"$articlecategory\">
 <link rel=\"icon\" href=\"../../common/images/lf-16.png\" type=\"image/png\">
 <TITLE>lf$articlenumber, ${articlecategory}: $articletitle</TITLE>
<style type=\"text/css\">
<!--
 td.top {font-family: Arial,Geneva,Verdana,Helvetica,sans-serif; font-size:12 }
 pre { font-family:monospace,Courier }
 pre.code { font-family:monospace,Courier;background-color:#aedbe8; }
 p.cl { color:#EE9500 }
 table.left { margin-right:0.3cm }
 a.nodec { text-decoration:none }
 p.trans { font-size:8pt; text-align:right }
 p.clbox { width:50%; alignment:center; background-color:#FFD700; 
           border-style:none; border-width:medium; border-color:#FFD700; 
           padding:0.5cm;  text-align:center }
 p.code { width:80%; alignment:center; background-color:#aedbe8; 
          border-style:none; border-width:medium; border-color:#aedbe8; 
          padding:0.1cm;  text-align:left }
 p.foot { background-color:#AAAAAA; color:#FFFFFF; border-style:none; 
          border-width:medium; border-color:#AAAAAA; padding:0.5cm ; 
          margin-top:0.1cm; margin-right:1cm; margin-left:1cm; 
          text-align:center }
 div.tbbutton {
   background: #ddd;
   border-right: 1px solid #aaa;
   border-bottom: 1px solid #aaa;
   margin: 2px 5px 2px 5px;
   text-align: center;
   width: 20em;
   line-height: 1.2em;
   padding: 2px;
   font-size: 12px;
   white-space: nowrap;
   color: #555;
 }
 div.bbutton {
   background: #ddd;
   border-right: 1px solid #aaa;
   border-bottom: 1px solid #aaa;
   float: left;
   margin: 2px 5px 2px 5px;
   text-align: center;
   line-height: 1.2em;
   padding: 2px;
   font-size: 12px;
   white-space: nowrap;
   color: #555;
 }
-->
</style>
 $base
</HEAD>
<BODY bgcolor=\"#ffffff\" text=\"#000000\">
 <!-- this is generated html code. NEVER use this file for your
 translation work. Instead get the file with the same article number
 and .meta.shtml in its name. Translate this meta file and then
 use lfparser program to generate the final article -->
 <!-- lfparser can be obtained from http://main.linuxfocus.org/~guido/dev/lfparser.html -->

<!-- this is used by a number of tools:
 =LF=AUTHOR: $articleauthorname
 =LF=CAT___: $articlecategory
 =LF=TITLE_: $articletitle
 =LF=NUMBER: $articlenumber
 =LF=ANAME_: $articlename
 =LF=PARSER: $ver
 -->

<!-- 2pdaIgnoreStart -->
");

if ($style == 1){
    $fd_out->print("
<!-- start navegation bar, old, style=1 -->
<MAP name=\"top\">
  <AREA shape=\"rect\" coords=\"367,9,418,30\" href=\"../\">
  <AREA shape=\"rect\" coords=\"423,9,457,30\" href=\"../map.html\">
  <AREA shape=\"rect\" coords=\"463,9,508,30\" href=\"../indice.html\">
  <AREA shape=\"rect\" coords=\"514,9,558,30\" href=\"../Search/\">
</MAP>
<MAP name=\"bottom\">
  <AREA shape=\"rect\" coords=\"78,0,163,15\"  href=\"../News/\">
  <AREA shape=\"rect\" coords=\"189,0,284,15\" href=\"../Archives/\">
  <AREA shape=\"rect\" coords=\"319,0,395,15\" href=\"../Links/\">
  <AREA shape=\"rect\" coords=\"436,0,523,15\" href=\"../aboutus.html\">
</MAP>
<!-- IMAGE HEADER -->
<CENTER>
  <IMG src=\"../../common/images/$intdat{$lang}{topmap}\" width=\"600\" height=\"40\" border=\"0\" alt=\"$intdat{$lang}{alttop}\" ismap usemap=\"#top\" ><BR>
  <IMG src=\"../../common/images/$intdat{$lang}{botmap}\" width=\"600\" height=\"21\" border=\"0\" alt=\"$intdat{$lang}{altbot}\" ismap usemap=\"#bottom\">
</CENTER>
<!-- stop navegation bar -->
");
}elsif ($style == 0){
    $fd_out->print("
<!-- start navegation bar, ancient, style=0 -->
 <MAP name=\"navegator\">
 <AREA shape=\"rect\" coords=\"0, 0, 65, 28\" alt=\"$intdat{$lang}{home}\" href=\"../\">
 <AREA shape=\"rect\" coords=\"66, 0, 124, 28\" alt=\"$intdat{$lang}{index}\" href=\"../indice.html\">
 <AREA shape=\"rect\" coords=\"125, 0, 193, 28\" alt=\"$intdat{$lang}{search}\" href=\"../Search/\">
 <AREA shape=\"rect\" coords=\"194, 0, 260, 28\" alt=\"$intdat{$lang}{links}\" href=\"../Links/\">
 <AREA shape=\"rect\" coords=\"261, 0, 350, 28\" alt=\"$intdat{$lang}{aboutus}\" href=\"../aboutus.html\">
 </MAP>
 <TABLE border=\"0\" width=\"100%\" cellspacing=\"1\" cellpadding=\"0\">
  <TR align=\"center\">
    <TD><IMG src=\"../../common/images/border-short.jpg\" alt=\"[LinuxFocus Image]\" width=\"407\" height=\"88\">
    </TD>
  </TR>
  <TR align=\"center\">
    <TD bgcolor=\"#660033\"><IMG src=\"../../common/images/Navegator-bar.gif\" ismap usemap=\"#navegator\" 
     border=\"0\" width=\"351\" height=\"28\" alt=\"[Navegation Bar]\">
    </TD>
  </TR>
  <TR align=\"center\">
    <TD>     
    <center>
     <TABLE  border=\"0\" width=\"75%\" cellspacing=\"0\" cellpadding=\"5\">
       <TR bgcolor=\"#000000\">
         <TD align=\"center\">
           <A href=\"../News/\"><FONT color=\"#ffffff\"><B>$intdat{$lang}{news}</B></FONT></A>&nbsp; &nbsp;
           <A href=\"../Archives/\"><FONT  color=\"#ffffff\"><B>$intdat{$lang}{archives}</B></FONT></A>&nbsp; &nbsp;
           <A href=\"../map.html\"><FONT  color=\"#ffffff\"><B>$intdat{$lang}{map}</B></FONT></A>&nbsp; 
         </TD>
       </TR>
     </TABLE>
    </center>
    </TD>
  </TR>
 </TABLE>
<!-- stop navegation bar -->
");
}else{
    $fd_out->print("
<!-- start navegation bar, current, style=2 -->
 <!-- top navegation bar -->
 <TABLE summary=\"topbar_1\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" align=\"center\" width=\"90%\">
   <TR bgcolor=\"#2e2292\">
     <TD class=\"top\"><TABLE summary=\"topbar_1_logo\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" width=
       \"100%\">
         <TR><TD width=\"319\"><a href=\"../../\"><IMG src=\"../../common/images/logolftop_319x45.gif\"
           alt=\"[LinuxFocus-icon]\" width=\"319\" height=\"45\" align=\"left\" 
           border=\"0\"></a></TD>

           <TD class=\"top\">
             <TABLE summary=\"topbar_1_links\" width=\"100%\">
               <TR align=\"right\">
                 <TD class=\"top\">
                 ");
                 if ($articlenumber > 344) {
                     # new front page with article links
                     $fd_out->print(qq!
                 <A class="nodec" href="../../"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{up}</FONT></A> &nbsp;| 
                 <A class="nodec" href="../map.html"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{map}</FONT></A> &nbsp;| 
                 <A class="nodec" href="../indice.html"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{index}</FONT></A> &nbsp;| 
                 <A class="nodec" href="../Search/"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{search}</FONT></A> </TD>
                 !);
                 }else{
                     $fd_out->print(qq!
                 <A class="nodec" href="../../"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{up}</FONT></A> &nbsp;| 
                 <A class="nodec" href="../"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{home}</FONT></A> &nbsp;| 
                 <A class="nodec" href="../map.html"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{map}</FONT></A> &nbsp;| 
                 <A class="nodec" href="../indice.html"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{index}</FONT></A> &nbsp;| 
                 <A class="nodec" href="../Search/"><FONT color=
                 "#DDDDDD" size="2">$intdat{$lang}{search}</FONT></A> </TD>
                 !);
                 }
                 $fd_out->print("
               </TR>

               <TR align=\"right\">
                 <TD class=\"top\">
                   <HR width=\"100%\" noshade size=\"1\">
                 </TD>
               </TR>
             </TABLE>
           </TD>
         </TR>
       </TABLE>
     </TD>
   </TR>
 </TABLE>
 <!-- end top navegation bar -->
 <!-- blue bar -->
 <TABLE summary=\"topbar_2\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" align=\"center\"
 width=\"90%\">
   <TR bgcolor=\"#00ffff\">
     <TD><IMG src=\"../../common/images/transpix.gif\" width=\"1\" height=
     \"2\" alt=\"\"></TD>
   </TR>
 </TABLE>
 <!-- end blue bar -->
 <!-- bottom navegation bar -->
 <TABLE summary=\"topbar_3\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" align=\"center\"
 width=\"94%\">
   <TR bgcolor=\"#000000\">
     <TD>
       <TABLE summary=\"topbar_3_links\" cellspacing=\"0\" cellpadding=\"1\" border=\"0\" width=
       \"100%\">
         <TR align=\"center\">
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../News/\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{news}</FONT></A> </TD>
           <TD WIDTH=\"5%\"><FONT color=\"#FFFFFF\">|</FONT> </TD>
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../Archives/\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{archives}</FONT></A> </TD>
           <TD WIDTH=\"5%\"><FONT color=\"#FFFFFF\">|</FONT> </TD>
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../Links/\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{links}</FONT></A> </TD>
           <TD WIDTH=\"5%\"><FONT color=\"#FFFFFF\">|</FONT> </TD>
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../aboutus.html\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{aboutus}</FONT></A> </TD>
         </TR>
       </TABLE>
     </TD>
   </TR>
 </TABLE>
 <!-- end bottom navegation bar -->
<!-- stop navegation bar -->
");
}

$fd_out->print("
<!-- SSI_INFO -->

<!--#include virtual=\"../../dynahead.shtml\" -->

<!-- SSI_INFO STOP -->
<!-- 2pdaIgnoreStop -->

<!-- SHORT_BIO_ABOUT_THE_AUTHOR_AND_INDEX_START -->
<TABLE ALIGN=\"LEFT\" BORDER=\"0\" WIDTH=\"195\" summary=\"about the author\" class=\"left\">
<TR>
<TD>
");

if ($palmdownload){
    $fd_out->print("
<!-- 2pdaIgnoreStart -->
<!-- PALM DOC -->
<TABLE BORDER=0 hspace=4 vspace=4 summary=\"pda download\"> <TR> <TD>
<font size=1> <img src=\"../../common/images/2doc.gif\" width=34 align=left border=0 height=22 alt=\"convert to palm\"><a href=\"http://cgi.linuxfocus.org/cgi-bin/2ztxt\">Convert to GutenPalm</a><br>or <a href=\"http://cgi.linuxfocus.org/cgi-bin/2pda\">to PalmDoc</a></font>
</TD> </TR> </TABLE>
<!-- END PALM DOC -->
<!-- 2pdaIgnoreStop -->
<br>");
}
# katja is very active:
$articleauthorgender="female" if ($articleauthorname=~/katja/i && !$articleauthorgender);
#
$fd_out->print("
<$articleauthorimg>
<BR>$intdat{$lang}{auth}  $articleauthor
<BR><BR>\n");
if (@articleaboutauthor){
    $tmp=join("", @articleaboutauthor);
    if (length($tmp) > 10){
        $fd_out->print("<I>".$intdat{$lang}{'aboutauthor'.$articleauthorgender}.":</I><BR>\n");
        $fd_out->print("<!-- aboutauthor_start -->\n");
        $fd_out->print(join "", @articleaboutauthor);
        $fd_out->print("\n<!-- aboutauthor_stop -->\n");
    }
}
#
my $Translatedto_printed=0;
my $proofread="";
$fd_out->print("<!-- TRANSLATED_TO $lang -->\n");
for $tmp (@articletransinfo){
    if ($tmp->{'to'} eq $lang && $tmp->{'from'} ne 'orig'){
        if ($tmp->{'from'} eq $lang){
            next if ($Translatedto_printed==0);
            $proofread= " [".$intdat{$lang}{'proofread'}."]";
        }else{
            $proofread= "";
        }
        $fd_out->print("<BR><BR><I>".$intdat{$lang}{'TranslatedToThisLangBy'}.":</I><BR>\n") unless($Translatedto_printed);
        $Translatedto_printed=1; # there may be a 'en to en' for proof reading 
        if ($tmp->{'linktype'} eq 'email'){
            $fd_out->print($tmp->{'name'} . "$proofread <small>&lt;".$tmp->{'link'}."&gt;</small>\n");
        }else{
            $fd_out->print($tmp->{'name'} . "$proofread (<a href=\"".$tmp->{'link'}."\"><font size=\"1\">homepage</font></a>)\n");
        }
        $fd_out->print("<br>\n");
        $fd_out->print("<!--\n =LF=TRANSTO=".$lang.": ".$tmp->{'name'}."\n-->\n"); 
    }
}
$fd_out->print("<!-- TRANSLATED_TO_STOP -->\n");
$fd_out->print("<!-- INDEX_START -->\n");
if (@articleindex){
    $fd_out->print("<BR><i>$intdat{$lang}{content}</i>:\n<UL>\n");
    for $tmp (@articleindex){
        $fd_out->print("  <LI><A HREF=\"#${articlenumber}lfindex$i\">$tmp</A></LI>\n");
        $i++;
    }
    $fd_out->print("  <LI><A HREF=\"$lftalkback?anum=$articlenumber\">".$intdat{$lang}{'lftalkback'}."</A></LI>\n") if (!$opt_T && $articlenumber > 100);
    $fd_out->print("</UL>\n");
}else{
    print STDERR "Warning: could not generate an article index\n";
}
$fd_out->print("\n</TD></TR></TABLE>\n");
$fd_out->print("<!-- INDEX_STOP -->\n");
$fd_out->print("<!-- SHORT_BIO_ABOUT_THE_AUTHOR_AND_INDEX_STOP -->\n");
$fd_out->print("<!-- HEAD_OF_THE_ARTICLE_START -->\n");
$fd_out->print("<br>&nbsp;\n"); # needed due to a bug in netscape
$fd_out->print("<table border=\"0\"><tr><td>\n");
$fd_out->print("<H2>$articletitle</H2>\n $articleimage");
$fd_out->print("\n<!-- ABSTRACT OF THE ARTICLE -->\n");
$fd_out->print("<P><i>$intdat{$lang}{abstract}</i>:\n<P>\n");
$fd_out->print("<!-- articleabstract_start -->\n");
$tmp= join "", @articleabstract;
$fd_out->print($tmp);
$fd_out->print("\n<!-- articleabstract_stop -->\n");
# new blue bar:
$fd_out->print("\n<br><!-- HR divider --><center><font color=\"#8282e0\"><b>_________________ _________________ _________________</b></font></center><br>\n");
$fd_out->print("</td></tr></table>\n");
$fd_out->print("<!-- HEAD_OF_THE_ARTICLE_STOP -->\n");
$fd_out->print("<!-- BODY_OF_THE_ARTICLE_START -->\n");
$fd_out->print("$articlebody\n");
$fd_out->print("<!-- BODY_OF_THE_ARTICLE_STOP -->\n");
$fd_out->print("<!-- 2pdaIgnoreStart -->\n");
$fd_out->print(qq!<A NAME="talkback">&nbsp;</a>
<h2>$intdat{$lang}{lftalkback}</h2>
$intdat{$lang}{talkbacktext}
<center>
    <table width="250" border=0><tr><td>
    <div class="tbbutton"><A class="nodec" href="$lftalkback?anum=$articlenumber">&nbsp;$intdat{$lang}{goto_talkback}&nbsp;</a></div>
    </td></tr></table>
</center>
\n!) if (!$opt_T && $articlenumber > 100);
$fd_out->print("<br clear=\"all\">\n<HR size=\"2\" noshade>\n");
# we need a table for netscape communicator compatibility
$fd_out->print("<table width=\"250\" border=0><tr><td>\n");
$fd_out->print("<div class=\"bbutton\"><a class=\"nodec\" href=\"../../\">&lt;--, LF ".$intdat{$lang}{'home'}."</a></div>\n");
$fd_out->print("</td><td>\n");
$fd_out->print("<div class=\"bbutton\"><a class=\"nodec\" href=\"./\">".$intdat{$lang}{'issueindex'}."</a></div>\n");
$fd_out->print("</td></tr></table>\n");
$fd_out->print("<br clear=\"all\">\n<HR size=\"2\" noshade>\n");
$fd_out->print("<!-- ARTICLE FOOT -->
<CENTER><TABLE WIDTH=\"98%\" summary=\"footer\">
<TR><TD ALIGN=CENTER BGCOLOR=\"#bdc6d5\" WIDTH=\"50%\">
<A HREF=\"../../common/lfteam.html\">$intdat{$lang}{wwwresp}</A>
<BR><FONT COLOR=\"#1111aa\"><a href=\"../../common/copy.html\">&copy; $articleauthorname</a>$somerights<br><a href=\"http://www.linuxfocus.org\">http://www.LinuxFocus.org</a></FONT>
");
$fd_out->print("</TD>\n");

if (scalar(@articletransinfo)>0){ # set to 1 to show only a list if there is at least one translator
    $fd_out->print("<TD BGCOLOR=\"#bdc6d5\">\n<!-- TRANSLATION INFO -->\n");
    $fd_out->print("<font size=2>$intdat{$lang}{transinfo}:</font>\n<TABLE summary=\"translators\">\n");
    for $tmp (@articletransinfo){
        if ($tmp->{'from'} eq 'orig'){
            $fd_out->print("  <tr><td><font size=\"2\">");
            $fd_out->print($tmp->{'to'}." --&gt; -- : ");
            if ($tmp->{'linktype'} eq 'email'){
                $fd_out->print($tmp->{'name'} . " <small>&lt;".$tmp->{'link'}."&gt;</small></font></td></tr>\n");
            }else{
                $fd_out->print($tmp->{'name'} . " (<a href=\"".$tmp->{'link'}."\"><font size=\"1\">homepage</font></a>)</font></td></tr>\n");
            }
            next;
        }
        $fd_out->print("  <tr><td><font size=\"2\">");
        $fd_out->print($tmp->{'from'}." --&gt; ".$tmp->{'to'}.": ");
        if ($tmp->{'linktype'} eq 'email'){
            $fd_out->print($tmp->{'name'} . " &lt;".$tmp->{'link'}."&gt;</font></td></tr>\n");
        }else{
            $fd_out->print($tmp->{'name'} . " (<a href=\"".$tmp->{'link'}."\"><font size=\"1\">homepage</font></a>)</font></td></tr>\n");
        }
    }
    $fd_out->print("</TABLE>\n</TD>\n");
}else{
    $fd_out->print("<TD BGCOLOR=\"#bdc6d5\">&nbsp;");
    $fd_out->print("\n<!-- OLD FORMAT, NO TRANSLATION INFO -->\n");
    $fd_out->print("</TD>\n");
}
$fd_out->print("</TR></TABLE></CENTER>\n");
$fd_out->print("<p><font size=1>$today, generated by lfparser version $ver</font></p>\n");
$fd_out->print("<!-- 2pdaIgnoreStop -->\n");
$fd_out->print("</BODY>\n</HTML>\n");
#
$fd_out->flush;
}

#-----
# handle the parsed text chunks.
sub evalarticle{
    my $i=0;
    my $type;
    my $content; 
    my $transinfostate=0; 
    my ($link,$linktype,$name,$transinfolang1,$transinfolang2);
    # states in which we ignore <P> <BR> </P>
    my %ignorePandBR=(1=>1,2=>1,3=>1,4=>1,5=>1,6=>1,7=>1,8=>1,11=>1,12=>1);
    for $type (@parsedtypes){
        # remove empty text and &nbsp; which is inserted by WYSIWYG editors
        $parseddata[$i]=~ s/\&nbsp\;//g if ($type eq "Text");
        if ($type eq "Text" && $parseddata[$i]=~ /^[\r\n\t ]+$/){
            $i++; next;
        }
        if ($type eq "Text" && !$parseddata[$i]){
            $i++; next;
        }
        # dbg, debug:
        #print "-- $parsestate: $parseddata[$i] type: $type --\n";
        # start of article, search for heading:
        if ($parsestate==0 && $type=~/HeadingLevelTag/){
            if ($type eq "HeadingLevelTag1"){
                $articletitle=$parseddata[$i];
                $articletitle=~s/\s+/ /g;
                $parsestate++;
            }else{
                die "ERROR: The first heading must be the title of the article on level 1. Note: you may not have \"_LF_\" or nested tags in the title.\n";
            }
            $i++; next;
        }
        # ignoring of <P>, <BR>, </P> in certain states:
        if ($ignorePandBR{$parsestate}){
            if ($type eq "StartTag" && $parseddata[$i] =~/^P$/i){ $i++; next;}
            if ($type eq "StartTag" && $parseddata[$i] =~/^br$/i){ $i++; next;}
            if ($type eq "EndTag" && $parseddata[$i] =~/^\/P$/i){ $i++; next;}
        }
        # start of article, search for ArticleCategory:
        if ($parsestate==1){
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleCategory/){
                $parsestate++;
            }else{
                die "ERROR: The second heading must be ArticleCategory on level 4\n";
            }
            $i++; next;
        }
        #--
        # looking for the category
        if ($parsestate==2){
            if ($type eq "Text"){
                $articlecategory=$parseddata[$i];
                $articlecategory=~s/\s+//g;
                $parsestate++;
            }else{
                die "ERROR: The heading ArticleCategory must be followed by a text plain string without tags\n";
            }
            $i++; next;
        }
        #--
        # looking for the image heading
        if ($parsestate==3){
            if ($type eq "HeadingLevelTag4"){
                $parsestate++;
            }else{
                die "ERROR: The 3-rd heading must be AuthorImage after ArticleCategory description\n";
            }
            $i++; next;
        }
        #--
        # looking for the image 
        if ($parsestate==4){
            if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
                $parsestate++;
                $articleauthorimg=$parseddata[$i];
            }else{
                die "ERROR: Image of author missing after AuthorImage heading\n";
            }
            $i++; next;
        }
        #--
        # looking for the AuthorName
        if ($parsestate==5){
            # the old format is AuthorName the new is TranslationInfo
            # and they are mutual exclusive
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AuthorName/){
                $parsestate=6;
            }elsif ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/TranslationInfo/){
                $parsestate=7;
            }else{
                die "ERROR: AuthorName or TranslationInfo must be the heading after the Image, I found however: \"$parseddata[$i]\", tagtype=$type\n";
            }
            $i++; next;
        }
        #--
        # looking for the name and e-mail or home-page 
        if ($parsestate==6){
            if ($type eq "AnchorTag"){
                $articleauthor="<" . $parseddata[$i] . ">";
                if ($parseddata[$i]=~/\" *>(.+?)<\//){
                    $articleauthorname=$1;
                }else{
                    die "ERROR: in <$parseddata[$i]>, could not extract e-mail or home-page\n";
                }
                $parsestate=8;
            }else{
                die "ERROR: AuthorName must followed by an anchor tag\n";
            }
            $i++; next;
        }
        #--
        # looking for the name and e-mail or home-page 
        # parse the TranslationInfo pre-tag:
        if ($parsestate==7){
            if ($transinfostate == 0){
                if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){
                    $transinfostate++;
                    die "ERROR: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
                    $transinfolang1='orig';
                    $transinfolang2=$1;
                    $i++; next;
                }else{
                    die "ERROR1: in $parseddata[$i]: TranslationInfo must be followed by pargraph that looks like: <p>original in LANG <a href=\"nospam:....\">Author Name</a></p> or <p>original in LANG <a href=\"mailto:...\">Author Name</a></p>\n";
                }
            }else{
                if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){
                    die "ERROR1a: there must be only one original author under TranslationInfo\n";
                }
            }
            if ($transinfostate == 1){
                # this is still the original author but this time the A HREF=...
                # the tag can look like this:
                # a href="mailto:katja@linuxfocus.org" gender="female"
                # a href="mailto:katja@linuxfocus.org" gender="mplural"
                # a href="mailto:katja@linuxfocus.org" gender="fplural"
                if ($type eq "AnchorTag"){
                    $parseddata[$i]=~s/[\n\r\t]/ /g;
                    if ($parseddata[$i]=~/gender/i){
                        if ($parseddata[$i]=~/female/){
                            $articleauthorgender="female";
                        }elsif($parseddata[$i]=~/fplural/){
                            $articleauthorgender="fplural";
                        }elsif($parseddata[$i]=~/mplural/){
                            $articleauthorgender="mplural";
                        }
                    }
                    $parseddata[$i]=~s/gender *= *"?\w+"?//gi;
                    #$articleauthor="<" . $parseddata[$i] . ">";
                    $transinfostate++;
                    if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){
                        $articleauthorname=$2;
                    }else{
                        die "ERROR2: in <$parseddata[$i]>, can not extract name\n";
                    }
                    $link=$1; # could in this case as well be a homepage
                    $name=$2;
                    $name=~s/\s+/ /g;
                    $link=~s/\s+//g;
                    if ($link=~/(nospam|mailto):/i){
                        $linktype="email";
                        $link=~s/(nospam|mailto)://g;
                        $link=~s/\@/\/at\//g; # could be several authors
                        $link=~s/\s//g;
                        $link=~s/,/ /g; # to allow line breaks for long lines
                        # handel %28at%29 :
                        $link=~s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;;
                        $articleauthor="$name <br> <small>&lt;$link&gt;</small>";
                    }else{
                        $linktype="homepage";
                        $articleauthor="$name <a href=\"$link\"><font size=\"1\">(homepage)</font></a>";
                    }
                    push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype});
                    $i++; next;
                }else{
                    die "ERROR3: TranslationInfo must be followed by pargraph that looks like: <p>original in LANG <a href=\"nospam:email(at)domain.com\">Author Name</a></p>\n";
                }
            }
            if (($transinfostate % 2) == 0){
                # this is the "lang to lang" or already the AboutTheAuthor
                if($type eq "Text" && $parseddata[$i]=~/(\w+) +to +(\w+)/i){
                    $transinfostate++;
                    die "ERROR4: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
                    $transinfolang1=$1;
                    $transinfolang2=$2;
                    $i++; next;
                # looking for the AboutTheAuthor
                }elsif ($type eq "HeadingLevelTag4"){
                    # here we look also for the next heading:
                    if ($parseddata[$i]=~/AboutTheAuthor/){
                        $parsestate=9;
                        die "ERROR7: TranslationInfo not complete\n" unless(scalar(@articletransinfo) > 0);
                    }else{
                        die "ERROR8: The heading after TranslationInfo must be AboutTheAuthor and not \"$parseddata[$i]\"\n";
                    }
                    $i++; next;
                }else{
                    die "ERROR5: in $parseddata[$i]: TranslationInfo must have a pargraph that looks like: <p>LANG1 to LANG2<a href=\"nospam:your(at)email.domain\">Translator Name</a></p>\nAdditional &nbsp; and other things are not allowed\n";
                }
            }
            if (($transinfostate % 2) == 1){
                if ($type eq "AnchorTag"){
                    $transinfostate++;
                    $parseddata[$i]=~s/[\r\n]/ /g;
                    $parseddata[$i]=~s/gender *= *"?\w+"?//gi;
                    if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){
                        $link=$1; # could in this case as well be a homepage
                        $name=$2;
                        $name=~s/\s+/ /g;
                        $link=~s/\s+//g;
                        if ($link=~/(nospam|mailto):/){
                            $linktype="email";
                            $link=~s/mailto://g;
                            $link=~s/nospam://g;
                            $link=~s/\@/\/at\//g; # could be several authors
                            $link=~s/\s//g;
                            $link=~s/,/, /g; # could be several authors
                        }else{
                            $linktype="homepage";
                        }
                        push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype});
                    }else{
                        die "ERROR2:  TranslationInfo ($parseddata[$i]): could not get name\n"; 
                    }
                    $i++; next;
                }else{
                    die "ERROR6: TranslationInfo must have a pargraph that looks like: <p>LANG1 to LANG2<a href=\"nospam:email(at)somewhere.com\">Translator Name</a></p>\n";
                }
            }
            $i++; next;
        }
        #--
        # looking for the AboutTheAuthor when there is no TranslationInfo
        if ($parsestate==8){
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AboutTheAuthor/){
                $parsestate++;
            }else{
                die "ERROR: The heading after AuthorName must be AboutTheAuthor and not \"$parseddata[$i]\"\n";
            }
            $i++; next;
        }
        #--
        # reading about the author (html text without heading)
        if ($parsestate==9){
            if ($type=~/HeadingLe/){
                if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/Abstract/){
                    $parsestate++;
                }else{
                    die "ERROR: The heading after the \"about the author\" paragraph must be the Abstract and not \"$parseddata[$i]\"\n";
                }
            }else{
                # reading any html:
                if ($type=~/Tag/){
                    push(@articleaboutauthor,"<" . $parseddata[$i] . ">");
                }elsif ($type eq "Text"){
                    push(@articleaboutauthor,$parseddata[$i]);
                }else{
                    die "Programm error, unknown type $type in about author\n";
                }
            }
            $i++; next;
        }
        #--
        # reading the abstract (html text without heading)
        if ($parsestate==10){
            if ($type=~/HeadingLe/){
                if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleIllustration/){
                    $parsestate++;
                }else{
                    die "ERROR: The heading after the abstract paragraph must be ArticleIllustration but I found $type:\"$parseddata[$i]\"\n";
                }
            }else{
                # reading any html:
                if ($type=~/Tag/){
                    push(@articleabstract,"<" . $parseddata[$i] . ">");
                }elsif ($type eq "Text"){
                    push(@articleabstract,$parseddata[$i]);
                }else{
                    die "Programm error, unknown type $type in abstract\n";
                }
            }
            $i++; next;
        }
        #--
        # looking for the article illustration 
        if ($parsestate==11){
            if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
                $parsestate++;
                $articleimage="<" . $parseddata[$i] . ">";
            }else{
                die "ERROR: Image of article missing after ArticleIllustration heading\n";
            }
            $i++; next;
        }
        #--
        # looking for the ArticleBody is already checked in the parser:
        if ($parsestate==12){
            if ($type eq "Body"){
                $articlebody=$parseddata[$i];
                $articlebody=~s|<A NAME="${articlenumber}lfindex\d+">&nbsp;</A>||g;
            }else{
                die "Program error: state 12 but tag-type $type instead of ArticleBody\n";
            }
            $i++; next;
        }
        #--
        $i++;
    }
    die "ERROR: invalid article meta-format, debug state $parsestate. Either you do not have a <H1> at the beginning or there is still a bug in lfparser.\n" unless ($parsestate == 12);
    parsebodyforindex(\$articlebody);
    unless ($validcat{$articlecategory}){
        print STDERR "ERROR invalid article category $articlecategory\n";
        print STDERR "valid categories are:\n";
        foreach (keys %validcat){
            print STDERR " - \"$_\"\n";
        }
        exit 1;
    }
}
#-----
# generate an index for the file.
# parse the html file body and store the H2 H3 text stings in @articleindex
# parse takes a ref to a text string as argument.
sub parsebodyforindex($){
    my $text = shift;
    my @body;
    my $h;
    my $i=0;
    while (1) {
        # First we try to pull off any plain text (anything before a "<" char)
        if ($$text =~ /\G([^<]+)/gcs) {
            push(@body,$1);
        } elsif ($$text =~ /\G<\/HTML>/igcs) {
            next;
        } elsif ($$text =~ /\G<\/body>/igcs) {
            next;
        } elsif ($$text =~ /\G<[hH]3>(.+?)<\/[hH]3>/gcs) {
            $h=$1;
            push(@body,"<A NAME=\"${articlenumber}lfindex$i\">&nbsp;</A>\n<H3>".$h ."</H3>\n");
            push(@articleindex,$h);
            $i++;
        } elsif ($$text =~ /\G<[hH]2>(.+?)<\/[hH]2>/gcs) {
            $h=$1;
            push(@body,"<A NAME=\"${articlenumber}lfindex$i\">&nbsp;</A>\n<H2>".$h ."</H2>\n");
            push(@articleindex,$h);
            $i++;
        } elsif ($$text =~ m|\G(<[^>]*>)|gcs) {
            push(@body,$1);
        } else {
            # the string is exhausted, or there's no > in it.
            last;
        }
    }
    foreach $h (@articleindex){
        $h=~s/<.+?>//g;
    }
    $articlebody=join "",@body;
}
#-----
# parse the html file and store the result in @parseddata, @parsedtypes.
# parse takes a ref to a text string as argument.
sub parse($){
    my $text = shift;
    my $type;
    my $content; 
    while (1) {
        # First we try to pull off any plain text (anything before a "<" char)
        if ($$text =~ /\G([^<]+)/gcs) {
            $content = $1; $type = 'Text';
        } elsif ($$text =~ /\G<(!--.*?--)>/gcs) {
            # we ignore comments except if they are in the article body:
            next;
            #$type = 'Comment';
            #$content = $1;
        } elsif ($$text =~ /\G<(!.*?)>/gcs) {
            $type = 'Markup';
            $content = $1;
        # Then, look for an end tag
        } elsif ($$text =~ m|\G<(/[a-zA-Z][^<]*?)>|gcs) {
            $content = $1; $type = 'EndTag';
        # Look for a <a ..> ..</a> tag:
        } elsif ($$text =~ /\G<([aA] [^>]+>([^<]+)<\/[aA])>/gcs) {
            $content = $1; $type = "AnchorTag";
        # Look for a h[0-9] tag:
        } elsif ($$text =~ /\G<[hH](\d)>([^<]+)<\/[hH]\d>/gcs) {
            $content = $2; $type = "HeadingLevelTag$1";
            if ("$1" eq "4" && index($content,"ArticleBody")> -1){
                $content=$';
                $type="Body";
                push(@parseddata,$content);
                push(@parsedtypes,$type);
                last;
            }
        # Then, finally we look for a start tag
        # We know the first char is <, make sure there's a >
        } elsif ($$text =~ /\G<(.+?)>/gcs) {
            $content = $1; $type = 'StartTag';
        } else {
            # the string is exhausted, or there's no > in it.
            last;
        }
        #print "dbg $content type: $type\n";
        push(@parseddata,$content);
        push(@parsedtypes,$type);
    }
}
#--------------
sub htmlumlaute($){
    my $txt_ptr=shift;
	$$txt_ptr=~s//\&iexcl;/g;
	$$txt_ptr=~s//\&iquest;/g;
	$$txt_ptr=~s//\&Agrave;/g;
	$$txt_ptr=~s//\&Aacute;/g;
	$$txt_ptr=~s//\&Acirc;/g;
	$$txt_ptr=~s//\&Atilde;/g;
	$$txt_ptr=~s//\&Auml;/g;
	$$txt_ptr=~s//\&Aring;/g;
	$$txt_ptr=~s//\&Ccedil;/g;
	$$txt_ptr=~s//\&Egrave;/g;
	$$txt_ptr=~s//\&Eacute;/g;
	$$txt_ptr=~s//\&Ecirc;/g;
	$$txt_ptr=~s//\&Euml;/g;
	$$txt_ptr=~s//\&Igrave;/g;
	$$txt_ptr=~s//\&Iacute;/g;
	$$txt_ptr=~s//\&Icirc;/g;
	$$txt_ptr=~s//\&Iuml;/g;
	$$txt_ptr=~s//\&Ntilde;/g;
	$$txt_ptr=~s//\&Ograve;/g;
	$$txt_ptr=~s//\&Oacute;/g;
	$$txt_ptr=~s//\&Ocirc;/g;
	$$txt_ptr=~s//\&Otilde;/g;
	$$txt_ptr=~s//\&Ouml;/g;
	$$txt_ptr=~s//\&Oslash;/g;
	$$txt_ptr=~s//\&Ugrave;/g;
	$$txt_ptr=~s//\&Uacute;/g;
	$$txt_ptr=~s//\&Ucirc;/g;
	$$txt_ptr=~s//\&Uuml;/g;
	$$txt_ptr=~s//\&Yacute;/g;
	$$txt_ptr=~s//\&szlig;/g;
	$$txt_ptr=~s//\&agrave;/g;
	$$txt_ptr=~s//\&aacute;/g;
	$$txt_ptr=~s//\&acirc;/g;
	$$txt_ptr=~s//\&atilde;/g;
	$$txt_ptr=~s//\&auml;/g;
	$$txt_ptr=~s//\&aring;/g;
	$$txt_ptr=~s//\&aelig;/g;
	$$txt_ptr=~s//\&ccedil;/g;
	$$txt_ptr=~s//\&egrave;/g;
	$$txt_ptr=~s//\&eacute;/g;
	$$txt_ptr=~s//\&ecirc;/g;
	$$txt_ptr=~s//\&euml;/g;
	$$txt_ptr=~s//\&igrave;/g;
	$$txt_ptr=~s//\&iacute;/g;
	$$txt_ptr=~s//\&icirc;/g;
	$$txt_ptr=~s//\&ntilde;/g;
	$$txt_ptr=~s//\&ograve;/g;
	$$txt_ptr=~s//\&oacute;/g;
	$$txt_ptr=~s//\&ocirc;/g;
	$$txt_ptr=~s//\&ouml;/g;
	$$txt_ptr=~s//\&ugrave;/g;
	$$txt_ptr=~s//\&uacute;/g;
	$$txt_ptr=~s//\&ucirc;/g;
	$$txt_ptr=~s//\&uuml;/g;
}
#--------------
sub today(){
    my @ltime = localtime;
    #return a date in yyyy-mm-dd format
    my $today;

    $today =  sprintf("%04d-%02d-%02d",1900 + $ltime[5],$ltime[4] + 1,$ltime[3]);
    $today;
}
#-----
#
sub help(){
print "lfparser -- parse a LinuxFocus article in HTML meta syntax and
generate a final LinuxFocus article. The HTML meta syntax is described
in http://main.linuxfocus.org/~guido/dev/lfparser.html
It is a special HTML format that can easily be edited and converted to
the released article format. It gives LinuxFocus the flexibilty to change
the layout without editing all articles.

USAGE: lfparser [-hktoPTv][-s style][-l ar|cn|de|en|es|fr|gb|il|id|ir|hi|jp|ko|nl|pt|pl|ru|sr|it|tr] articleX.meta.shtml > articleX.shtml 
or
USAGE: lfparser [-hktoPTv][-s style][-l ar|cn|de|en|es|fr|gb|il|id|ir|hi|jp|ko|nl|pt|pl|ru|sr|it|tr] num

OPTIONS: -h this help
         -f Add a license note at the end which is compatible with the FDL
            (old gnu license)
         -l select a language for the output [config file: lang=xx]
         -k list all valid categories, and H4 headings and exit
         -o use old style header [config file: style=1], obsolated by -s
         -s set the header style, 2 new style, 1 old style, 0 ancient style
            [config file: style=0, style=1 or style=2]
         -P do not insert palm download even if enabled in config
         -p do insert palm download [config file: palm=1]
            Note: the palm download works only for certain languages.
         -T do not include talkback
         -t test mode. This inserts a <BASE href=..> into the
            article to include the images and other stuff from 
            ../../common/ without the need to have them locally available.
            This option must not be used for the final article.
         -v print version and exit.

The section AboutTheAuthor: can be empty for backward compatibility
with older articles.

If you do not specify a filename as argument but just a number
then lfparser will seatch for a file called article<num>.meta.shtml
in the current directory and write to article<num>.shtml
This is a shortcut to save some typing.

EXAMPLE: French: 
         lfparser -l fr article111.meta.shtml > article111.shtml
         or as shortcut:
         lfparser -l fr 111

         Arabic: 
         lfparser -l ar articleX.meta.shtml > articleX.shtml

You can have an optional ~/.lfparsercfg file with the following
syntax:
# comment
lang = de # make German the default language
style = 2 # new style, 1 would be old style, 0=ancient style
#

This will then set the configuration options described under
OPTIONS and you can run lfparser without specifying any options:
lfparser articleX.meta.shtml > articleX.shtml
This is lfparser version: $ver\n";

exit;
}
__END__ 


