#GPL
#GPL  libwhisker copyright 2000,2001,2002 by rfp.labs
#GPL
#GPL  This program is free software; you can redistribute it and/or
#GPL  modify it under the terms of the GNU General Public License
#GPL  as published by the Free Software Foundation; either version 2
#GPL  of the License, or (at your option) any later version.
#GPL
#GPL  This program is distributed in the hope that it will be useful,
#GPL  but WITHOUT ANY WARRANTY; without even the implied warranty of
#GPL  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GPL  GNU General Public License for more details.
#GPL

=pod

=head1 ++ Sub package: html
        
The html sub package implements a simple HTML parser.

=cut

################################################################

=pod

=head1 - Function: LW::html_find_tags
  
Params: \$data, \&callback_function
Return: nothing

LW::html_find_tags parses a piece of HTML and 'extracts' all found tags,
passing the info the given callback function.  The callback function must
accept two parameters: the current tag (as a scalar), and a hash ref of
all the tag's elements. For example, the tag <a href="/file"> will pass
'a' as the current tag, and a hash reference which contains
{'href'}="/file".

Notice: this function is slow! And using it to rewrite (via passback) is 
slower!  Make sure you have LW::bin installed to get the faster binary 
version.

=cut

sub html_find_tags {
 # use faster binary helper
 goto &LW::bin::html_find_tags 
 	if(defined $LW::available{'lw::bin'});
	
 my ($dataref, $callbackfunc)=@_;

 return if(!(defined $dataref      && ref($dataref)     ));
 return if(!(defined $callbackfunc && ref($callbackfunc)));

 my ($CURTAG, $ELEMENT, $VALUE, $c, $cc);
 my ($INCOMMENT,$INTAG,$INSCRIPT)=(0,0,0);
 my (%TAG, $ret, $start, $tagstart, $commstart, $scriptstart, $x);

 # YES, this looks like C.  In fact, it's my C version ported to
 # perl.  But it's faster and more dependable than any regex mess
 # someone could come up with.
 my $LEN = length($$dataref);
 for ($c=0; $c<$LEN; $c++){

	$cc=substr($$dataref,$c,1);

	if(!$INCOMMENT && !$INTAG && !$INSCRIPT && $cc ne '>' && $cc ne '<'){
		next; }

        if($cc eq '<'){
		if($INSCRIPT){
			if(lc(substr($$dataref,$c+1,7)) eq '/script'){
				$INSCRIPT=0;
				$TAG{'='}=substr($$dataref, $scriptstart,
					$c - $scriptstart - 1);
			} else { next; }
		}

                if(substr($$dataref,$c+1,3) eq '!--'){
                        $INCOMMENT=1; $commstart=$c; $c+=3;

		} else {
    	                $INTAG=1; $c++;
			$c++ while(substr($$dataref,$c,1)=~/[< \t\r\n]/);
			$tagstart=$c-1; 

			$CURTAG='';
			while(($x=substr($$dataref,$c,1))!~/[ \t\r\n>=]/ &&
					$c < $LEN){
				$CURTAG.=$x; $c++;}

			$c++ if($x ne '>');

			$INSCRIPT=1 if($CURTAG eq 'script');
		}	
		$cc=substr($$dataref,$c,1); # refresh current char (cc)
	}

        if($cc eq '>'){
		if($INSCRIPT){
			if($CURTAG eq 'script'){
				$scriptstart = $c + 1; 
			} else { next; }
		}
		if(!$INCOMMENT && $INTAG){ 
			$INTAG=0; 
			$ret=&$callbackfunc($CURTAG,\%TAG, $dataref,
				$tagstart, $c-$tagstart+1);
			if(defined $ret && $ret != 0){
				$c+=$ret;}
			$CURTAG='';
			%TAG=();
		}
                if($INCOMMENT && substr($$dataref,$c-2,2) eq '--'){
                        $INCOMMENT=0; 
			$TAG{'='}=substr($$dataref,$commstart+4,
				$c-$commstart-3);
			$ret=&$callbackfunc('!--',\%TAG, $dataref,
				$commstart, $c-$commstart+1);
			if(defined $ret && $ret != 0){
				$c+=$ret;}
			delete $TAG{'='};
			next;
		}
	}

        next if($INCOMMENT);

        if($INTAG){

                $ELEMENT=''; $VALUE='';

		# eat whitespace
		while(substr($$dataref,$c,1)=~/[ \t\r\n]/i){ $c++; }

		$start=$c;
		while(substr($$dataref,$c,1)!~/[ \t\r\n=\>]/i &&
			$c < $LEN) { $c++; }

		$ELEMENT=substr($$dataref,$start,$c-$start);

		$VALUE='';
		if(substr($$dataref,$c,1) ne '>'){
		 # eat whitespace
		 while(substr($$dataref,$c,1)=~/[ \t\r\n]/i) { $c++; }

                 if(substr($$dataref,$c,1) eq '='){ 
                	$c++;
			$start=$c;
			my $p = substr($$dataref,$c,1);
                        if($p eq '"' || $p eq '\''){ 
                        	$c++; $start++;
	                        $c++ while(substr($$dataref,$c,1) ne $p &&
	                        	$c < $LEN);
				$VALUE=substr($$dataref,$start,$c-$start);
                                $c++; 
			} else {
                                $c++ while(substr($$dataref,$c,1)!~/[ \t\r\n\>]/ &&
                                	$c < $LEN);
				$VALUE=substr($$dataref,$start,$c-$start);
			}

			# eat whitespace
                	while(substr($$dataref,$c,1)=~/[ \t\r\n]/) { $c++; }
                 } 
		} # if $c ne '>'
		$c--;
		$TAG{$ELEMENT}=$VALUE; # save element in the hash
	}
}}

################################################################
