#$Id: lclean,v 1.1.1.7 1997/07/01 00:06:43 schwartz Rel $
#
# lclean, Clean Laola style documents
#
# Copyright (C) 1997 Martin Schwartz 
#
# ATTENTION!
# 
# This program modifies some data in your "structured storaged" LAOLA 
# documents. Though I think, the program works correctly, there might be 
# errors or there might be in future unpredictable changes in the document 
# layout. Keep in mind, that this program will change your document files! 
#
# Further more, this program can recognize only this garbage, that is not 
# specific to any special program. E.g., in Word 6 documents there are data 
# parts, that lclean can clean, and there are parts, that only Word could 
# clean. So please:
#
#    1. Keep a backup of all treated documents, until you are sure, 
#       that the cleaned documents are proper. 
#
#    2. Don't rely on, that *all* garbage will be cleaned.
#
# See also usage() of this file. General information at:
#    http://wwwwbs.cs.tu-berlin.de/~schwartz/pmh/index.html
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, you should find it at:
#
#    http://wwwwbs.cs.tu-berlin.de/~schwartz/pmh/COPYING
#
# You can contact me via schwartz@cs.tu-berlin.de
#
#push (@INC, "/usr/lib/mswordview/laola");

useress_global: {
#
# Please uncomment (remove '#') / change settings according to your system
#
   # $sys_os = "unix"; 
   # $sys_os = "dos";  

   $global_targetdir = "doctrash"; # This is the output directory
}

sys_global: {
   $REV = ('$Revision: 1.1.1.7 $' =~ /: ([^ ]*)/) && $1;
   '$Date: 1997/07/01 00:06:43 $' =~ / ..(..)\/(..)\/(..)/;
   $DATE = "$2/$3/$1";
   &mystd('acdefi:lnrsz');
   &usage if !@ARGV || !($opt_r||$opt_c||$opt_s||$opt_i||$opt_l||$opt_e);
   @crc32=();
   &make_crc32_table();
   $hid_buf="";
   $hid_headerpart="";
}

main: {
   local($error); local($dont);
   local($status);
   local($fname);
   local($openmode)=$opt_c||$opt_i;

   foreach $infile (@ARGV) {

      $error=0; $dont=0;

      print "Processing \"$infile\": ";
      next if !&msg (&laola_open_document($infile, $openmode));

      block_a: {
         if ($opt_r) {
            $dont++;
            $error++ if !&msg (&report_trash(-s $infile));
         }
         if ($opt_e) {
            print "Extracting...";
            ($status, $fname) = &extract_hidden();
            $error++ if !&msg ($status, "($fname)");
         }
         if ($opt_l) {
            $dont++;
            $error++ if !&msg (&list_hidden());
         } 
         if ($opt_s||$opt_d) {
            print "Saving... ";
            $error++ if !&msg (&save_all_trash($infile));
         }
      }

      if (!$error && !$dont) {
         if ($opt_c) {
            print "Cleaning...";
            ($status, $size) = &clean_all_trash();
            &msg ($status, "($size bytes)");
         }
         if ($opt_i) {
            print "Inserting... ";
            &msg (&insert_hidden($opt_i));
         }
      }      
      &laola_close_document($infile);
      print "Done.\n\n";
   }
   exit 0;
}


sub usage {
   print "\n";
   print "lclean  Modify trash sections of OLE documents, like Excel or Word.\n";
   print "        Version $REV from $DATE\n";
   print "usage:  lclean -r {document}              Report documents trash\n";
   print "        lclean -c [-n] {document}         Clean document\n";
   print "        lclean -s [-a][-d][-z] {document} Save documents trash sections\n";
   print "        lclean -i <file> [-a] {document}  Insert (hide) a file in OLE documents\n";
   print "        lclean -l {document}              List the hidden file in OLE documents\n";
   print "        lclean -e [-f][-z] {document}     Extract hidden file from OLE documents\n";
   print "-a  All. When saving the trash sections, use one big file.\n";
   print "    When inserting a hidden file, use all trash space (1+2+4+8).\n";
   print "-c  Clean trash sections. Will be done (after -s) and (before -i).\n";
   print "-d  Directory, store trash sections in own directory. ".
         "Eg \"test\" for \"test.doc\".\n";
   print "-e  Extract the hidden file out of OLE documents, if there is one.\n";
   print "-f  Force, always overwrite existing files when extracting.\n";
   print "-i  Insert a <file> into the document. Use only trash space (1+2).\n";
   print "-l  List the hidden file in OLE documents, if there is one.\n";
   print "-n  Null, use null bytes instead of random bytes for cleaning trash type 4.\n";
   print "-r  Report, gives a little report about the trash sections.\n";
   print "-s  Save all trash sections into directory \"$global_targetdir\".\n";
   print "-z  Zero, don't create zero length files.\n";
   print "\n";
   print "ATTENTION: when cleaning a document or when inserting a file into, ".
         "keep a\n".
         "           backup until you are sure, that the modified file will be ok!\n";
   print "\n";
   exit 0;
}


#
# ------------------- Report about trash sections ---------------------
#

sub report_trash {
    local($fsize)=shift;
    local($i);
    local(@l)=0;
    local($lsum)=0;
    local(@type) = ("Big blocks", "Small blocks", "File space", "System space");

    print "\n";
    print "Trash (and system) report:\n";
    for ($i=0; $i<=3; $i++) {
       $l[$i] = &laola_get_trashsize(2**$i);
       printf ("   Type %d %15s %5d bytes\n", 
               2**$i, "(".$type[$i]."):", $l[$i]);
       $lsum+=$l[$i];
    }
    printf ("                 1+2:     %5d of %d bytes (%.1f %%)\n",
            $l[0]+$l[1], $fsize, 100*($l[0]+$l[1])/$fsize);
    printf ("                 1+2+4+8: %5d of %d bytes (%.1f %%)\n", 
            $lsum, $fsize, 100*($lsum/$fsize));
    "ok";
}

#
# ------------------ Save document trash into files -------------------
#

sub save_all_trash {
#
# Copies trash types to each a file as: 
# targetdir/basename.xx, where xx is the hex number the properties handle
#
   local($infile)=shift;
   local($outpath)=&get_outpath($infile);
   return "Error while saving!\n" if !$outpath;
   local($status, $i);
   local($buf);

   # save block trash
   if (!$opt_a) {
      for ($i=1; $i<=4; $i++) {
         $buf = "";
         return $status if ($status=&laola_get_trash(1<<($i-1), $buf)) ne "ok";
         return $status if ($status=&save_file($outpath.".tr$i", $buf)) ne "ok";
      }
   } else {
      $buf = "";
      return $status if ($status=&laola_get_trash(0, $buf)) ne "ok";
      return $status if ($status=&save_file($outpath.".tra", $buf)) ne "ok";
   }

   $status;
}

sub get_outpath {
#
# Get the path of the directory, where the trash sections shall be 
# stored. By default this is directory $global_targetdir. If command 
# line option "-d" is set, the directory is depending on the filename.
#
   local($basename) = &basename(shift);
   $global_targetdir = $basename if $opt_d;
   if (&targetdir($global_targetdir)) {
      return $global_targetdir."/".$basename;
   } else {
      return "";
   }
}


#
# --------------------- Hidden file management -------------------------
#

#
# The "hidden" file is tried to be stored in trash type 1|2, that means in 
# the unused blocks. With option -a tries additionally type 8 and 4.
#
# Hidden file header:
#    0     1 byte   Used trash types
#    1     1 byte   magic = 0x38
#    2     4 bytes  mode of file (access permissions)
#    6     4 bytes  modification time of file
#   0a     4 bytes  size of file
#   0e     4 bytes  crc32 of file
#   12     2 bytes  = $fl == length of filename
#   14     $fl      filename
#   14+$fl filesize file
#

sub insert_hidden {
#
# "ok"||$error = insert_hidden($filepath)
#
   local($filepath)=shift;
   local($status,$fname) = &readable_file($filepath);
   return $status if $status ne "ok";

   local($ttype, $tsize);
   local ($needed) = 0x14 + length($fname) + (-s $filepath);

   $ttype=1|2; 
   $tsize=&laola_get_trashsize($ttype);
   if ($opt_a) {
      if ($tsize < $needed) {
         $ttype = 1|2|8; $tsize=&laola_get_trashsize($ttype);
      }
      if ($tsize < $needed) {
         $ttype = 1|2|4|8; $tsize=&laola_get_trashsize($ttype);
      }
   }
   if ($tsize < $needed) {
      return "Not enough trash space to insert \"$fname\"...";
   }

   $status = &read_hidefile($filepath, $fname, $ttype);
   return $status if $status ne "ok";

   $status = &laola_modify_trash(
      $ttype, pack("C", $ttype).$hid_headerpart.$hid_buf, 0, $needed);
   return "Error while inserting \"$filepath\" into document!" 
      if $status ne "ok";

   "ok";
}

sub read_hidefile {
#
# "ok"||error = read_hidefile ($filepath, $filename, $ttype)
#
# This works with global variables... The idea is, that the file to 
# hide will be read only once, if more than one document shall be 
# treated. Especially the crc32 has to be calculated then only once.
#
   return "ok" if $hid_headerpart;

   local($filepath, $fname, $ttype)=@_;
   local($crc);
   local($mode, $mtime) = (stat($filepath))[2,9];
   local($status)="ok";

   if (open(IO, $filepath)) {
      binmode(IO);
      if (read(IO, $hid_buf, -s $filepath) == -s $filepath) {
         $crc = &get_crc32($hid_buf);
         $hid_headerpart = pack("CVVVVv", 
            0x38, $mode, 
            $mtime, -s $filepath, $crc, 
            length($fname));
         $hid_headerpart .= $fname;
      } else {
         $status="Error while reading \"$filepath\"!";
      }
      close(IO);
   }
   $status;
}

sub list_hidden {
#
# void = list_hidden()
#
   local($ttype, @header)=&get_hidden();
   if (!$ttype) {
      print "\nHidden file report:\n".
            "   No hidden file stored.\n";
      return "ok";
   }
   local($fname)="";
   local($status)="";

   local($ttype, $magic, $mode, $mtime, $fsize, $crc, $fnsize)=@header;
   $status = &laola_get_trash($ttype, $fname, 0x14, $fnsize);
   return "Input error while retrieving information!" if $status ne "ok";

   local(@tim) = localtime($mtime);
   printf ("\nHidden file report:\n".
           "   Trash type is %d\n".
           "   %5d %02d/%02d/%02d %02d:%02d %s\n",
           $ttype,
           $fsize, 
           $tim[4]+1, $tim[3], $tim[5],
           $tim[2], $tim[1],
           $fname);
   "ok";
}

sub extract_hidden {
#
# ("ok"||error, $filename) = extract_hidden()
#
# Saves the file from a Laola document to normal file system.
# - The Laola document will not be modified. 
#
   local($ttype, @header)=&get_hidden();
   return ("ok", "nothing to do") if !$ttype;

   local($fname)="";
   local($file)="";
   local($status)="";

   local($ttype, $magic, $mode, $mtime, $fsize, $crc, $fnsize)=@header;

   $status = &laola_get_trash($ttype, $fname, 0x14, $fnsize);
   return "Input error while retrieving information!" if $status ne "ok";

   $status = &laola_get_trash($ttype, $file, 0x14+$fnsize, $fsize);
   return "Error while reading data!" if $status ne "ok";
   return "File is corrupt!" if &get_crc32($file) != $crc;

   return "Cannot save to file \"$fname\"!" if !&replaceable_file($fname);
   $status = &check_replace_file($fname);
   return ("ok", "") if !$status;

   $status = &save_file($fname, $file);
   return "Error while saving file \"$fname\"!" if $status ne "ok";

   chmod ($mode, $fname);
   utime (time, $mtime, $fname);

   return ("ok", $fname);
}


sub try_hidden {
#
# @header||() = &try_hidden($ttype);
#
   local($ttype)=shift;
   local($header)="";
   local(@header);
   local(@empty)=();
   check: {
      last if &laola_get_trash($ttype, $header, 0, 0x14) ne "ok";
      @header = unpack ("CCVVVVv", $header);
      last if $header[0] != $ttype;
      last if $header[1] != 0x38;
      last if $header[4] > &laola_get_trashsize($ttype);
      return @header;
   }
   return @empty;
}

sub get_hidden {
#
# ($ttype||0, @header) = &get_hidden();
#
   local($ttype)=0;
   local(@header)=();

   if (@header = &try_hidden(1|2)) {
      $ttype = 1|2;
   } elsif (@header = &try_hidden(1|2|8)) {
      $ttype = 1|2|8;
   } elsif (@header = &try_hidden(1|2|4|8)) {
      $ttype = 1|2|4|8;
   }
   return ($ttype, @header);
}


#
# ------------------------- Clean document -----------------------------
#

sub clean_all_trash {
#
# ($status, $alltrashsize) = clean_all_trash();
#
# Cleans trash types with different values
#
   local($bufsize, $status);
   local($alltrashsize)=0;

   clean: {
      # Free blocks. I prefer the byte "\00"
      ($status, $bufsize) = &clean_trash(1|2, "\00");
      last if $status ne "ok";
      $alltrashsize += $bufsize;

      # Clean file trash, I prefer random strings. 
      # ("random" is a special word for &clean_trash)
      ($status, $bufsize) = &clean_trash(4, "random");
      last if $status ne "ok";
      $alltrashsize += $bufsize;

      # Unused system bytes, value 0xff looks nice
      ($status, $bufsize) = &clean_trash(8, "\xff");
      last if $status ne "ok";
      $alltrashsize += $bufsize;
   }

   return ($status, $alltrashsize);
}

sub clean_trash {
#
# ("ok"||error, $bufsize) = clean_trash($type, $fill);
#
# The trash of type $type will be filled with the pattern of string $fill.
# If the $fill is "random", the trash will be filled with random bytes.
#
   local($type, $fill)=@_;
   local($buf, $bufsize, $status);
   $buf="";
   $bufsize = &laola_get_trashsize($type);

   if ($fill =~ /^random$/i) {
      if (!$opt_n) {
         &new_random_seed();
         $buf=&get_random_shuffle_string(&get_random_shuffle_sizes($bufsize));
      } else {
         $buf="\00" x $bufsize;
      }
   } else {
      $buf = $fill x ( int($bufsize/length($fill)) );
   }
   $buf .= substr($fill, 0, $bufsize % length($fill));

   $status = &laola_modify_trash($type, $buf, 0, $bufsize);
   return ($status, $bufsize);
}

#
# ------------------- (More or) Less Random Support ---------------------
#

sub new_random_seed {
#
# Random seed. *Not* secure!
#
   srand(time^$$);
}

sub get_random_shuffle_sizes {
#
# @size = &get_random_shuffle_sizes($size);
#
# Returns a list of randomly yielded sizes, that together have a 
# length of $size.
#
   local($maxsize)=shift;
   local(@size, $size, $chunk);

   local($chunk_avgnum) = 5 + int(rand(10)) + int(rand($maxsize/0x2000));
   local($avgsize) = int($maxsize/$chunk_avgnum);

   @size=(); $size=0;
   while ($size < $maxsize) {
      $chunk = int( rand($avgsize*2) );
      $chunk = ($maxsize - $size) if (($size+$chunk) > $maxsize);
      $size += $chunk;
      push (@size, $chunk);
   }
   return @size;
}

sub get_random_shuffle_string {
#
# $buf = &get_random_shuffle_string(@sizes);
#
# Return one string consisting out of $#sizes substrings, each 
# consisting out of $sizes[$i] random bytes.
#
   local($buf)="";
   foreach $size (@_) {
      $buf .= &get_random_string($size);
   }
   &random_shuffle_string($buf);
}

sub get_random_string {
#
# $buf = &get_random_string($size)
#
# Return one string consisting out $size random bytes.
#
   local($len)=shift;
   local($buf)=""; 
   local($i); 
   for ($i=0; $i<$len; $i++) {
      $buf .= pack("C", rand(256));
   }
   $buf;
}

sub random_shuffle_string {
#
# $shuffled_buf = &random_shuffle_string($buf);
# 
# Permutates the elements of $buf randomly.
#
   local($len)=length($_[0]);
   local($sbuf)=""; local(%sbuf)=();

   # Get a $len elements sized hash 
   # (hash == perl slang for associative array)
   # Keys are random integer numbers, values are elements of $buf.
   # Took integers, because sort is to slow for big lists of real.
   # 
   local($key); local($i)=0;
   while ($i < $len) {
      $key=int(rand($len*191));
      if (!$sbuf{$key}) {
         $sbuf{$key} = substr($_[0], $i++, 1);
      }
   }
   # Sort the hash array according to it's keys, create the buffer.
   for (sort {$a <=> $b} keys %sbuf) {
      $sbuf .= $sbuf{$_}; 
   }

   $sbuf;
}

#
# -------------------------- File treatment ----------------------------
#

sub targetdir {
#
# If none exists, create a target directory. This will be readable 
# only to the person owning the directory.
#
   local($dir)=shift;
   return 1 if -d $dir;

   local($oldumask, $status);

   if ($sys_os eq "unix") {
      $oldumask = umask; umask 0;
         $status = mkdir ($dir, 0700);
      umask $oldumask;
   } elsif ($sys_os eq "dos") {
      $status = mkdir ($dir, 0700);
   }
   if ($status) {
      print "(created directory \"$global_targetdir\") ";
   } else {
      print "Cannot create directory \"$global_targetdir\"!\n";
   }
   $status;
}

sub save_file {
#
# "ok"||$error = &save_file($path, $buf);
#
   if ($opt_z) {
      return "ok" if !$_[1];
   }
   if (! (open(OUT, '>'.$_[0]) && binmode(OUT)) ) {
      return "Cannot save to file \"$_[0]\"!";
   }
   print OUT $_[1];
   close(OUT); 
   "ok";
}

sub replaceable_file {
#
# 1||0 = check_replaceable_file($filename)
#
   local($file)=shift;
   if (-e $file) {
      return 0 if !-f $file;
      return 0 if !-w $file;
   }
   1;
}

sub check_replace_file {
#
# 1||0 = check_replace_file($filename)
#
   local($file)=shift;
   local($key)="";

   return 1 if $opt_f;
   return 1 if !-e $file;
   return 0 if !-f $file;
   return 0 if !-w $file;
   print "\nFile \"$file\" exists. Replace it? (y/n) ";
   while (1) {
      last if ($key=getc) =~ /[yn]/;
      print "(y/n) " if $key eq "\x0a";
   }
   getc; # get \x0a from userEss input
   $key =~ /y/;
}

sub readable_file {
#
# ("ok"||error, $filename) = &readable_file($filepath);
#
   local($filepath)=shift;
   return "No file given!"                if !$filepath;
   return "\"$filepath\" does not exist!" if !-e $filepath;
   return "\"$filepath\" is no file!"     if !-f $filepath;
   return "Cannot read \"$filepath\"!"    if !-r $filepath;
   local($fname)=substr($filepath, rindex($filepath,'/')+1);
   return ("ok", $fname);
}

#
# ------------------------------- CRC ---------------------------------
#

sub make_crc32_table {
#
# void = make_crc32_table()
#
   return if @crc32;
   local($crc, $i);
   for ($i=0; $i<=0xff; $i++) {
      $crc = $i;
      for (1 .. 8) { 
         if ($crc & 1) {
            $crc = $crc/2 ^ 0xedb88320; 
         } else {
            $crc /= 2;
         }
      }
      push (@crc32, $crc);
   }
}

sub get_crc32 {
#
# $crc = &crc32 (extern $buf)
#
# Computes a 32bit CRC for the specified buf. This might take some time!
#
   local($crc)=0xffffffff;
   local(@buf) = unpack(sprintf("C%d", length($_[0])), $_[0]);
   while(@buf) {
      $crc=$crc32[$crc&0xff ^ shift(@buf)] ^($crc/0x100);
   }
   $crc;
}

#
# ------------------------------ utils --------------------------------
#

sub msg {
   local($status) = shift;
   if ($status eq "ok") {
      if (defined $_[0]) {
         print "$_[0] ";
      }
      return 1;
   } else {
      if ($status) {
         print " error!\nError: $status\n";
      } else {
         print " error!\n";
      }
      print "\n" if ! ($status =~ /\n$/);
      return 0;
   }
}

sub basename {
#
# $basename = basename($filepath)
#
   (substr($_[0], rindex($_[0],'/')+1) =~ /(^[^.]*)/) && $1;
}

sub mystd {
   local($opts)=shift;
   $|=1; $[=0;
   if (!$sys_os) {
      # If sys_os is not set explicitly: 
      #    assume a dos system, if some standard /etc/file not present.
      $sys_os = "dos";
      $sys_os = "unix" if 
         (-e '/etc/group') || (-e '/etc/hosts.equiv') || (-e '/etc/passwd');
   }
   if ($sys_os eq "unix") {
      splice(@INC, 0, 0, 
             ($ENV{'HOME'}||$ENV{'LOGDIR'}||(getpwuid($<))[7]).'/lib/perl/');
   }
   require "laola.pl";
   require "getopts.pl";
   &Getopts ($opts.'o:'); 
   if ($opt_o) {
      if (!open (STDOUT, '>'.$opt_o)) {
         print "Error! Cannot redirect output to \"$opt_o\"!\n\n";
         exit 1;
      }
   }
}

