#!/usr/bin/perl
#
# extract URLs from TOC of a debiandoc-sgml document for use by
# summary_html

# Copyright (C) 1998 by Christian Schwarz 
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.

# Modified to handle new chapter title layout, 981014, Richard Braakman


$docid = shift;
$url = shift;
$dir = shift;

print "$docid $url/index.html\n";

for $src (<$dir/*.html>) {
  $src =~ m,/([^/]*)$,o;
  $file = $1;

  # skip special files
  next if ($file eq 'index.html') or ($file eq 'footnotes.html');

  open(IN,$src) or die "cannot open input file $src for reading: $!";
  while (<IN>) {
    chop;
  
    # new chapter?
    if (/^<h1>/o) {
      my $chap;
      do {
	  chop($_ = <IN>);
	  $chap = $1 if /chapter (\d+)/;
      } while not m,</h1>, and not $chap;

      $chap or die "$file:$.: cannot find chapter title";

      print "$docid-$chap $url/$file\n";
    }
    # new section?
    elsif (/^<h\d><A name=\"([^\"]+)\">\s*$/o) {     # "
      my ($anchor) = ($1);

      # read next line
      chop($_ = <IN>);
      m/^(\S+)/o;
      my ($sec) = ($1);
      
      print "$docid-$sec $url/$file\#$anchor\n";
    }
  }
  close(IN);
}

exit 0;
