#!/usr/bin/perl -w

### Should check/update for XML files as per
###  https://isc.sans.edu/forums/diary/XML+A+New+Vector+For+An+Old+Trick/19423/

# Search (mail spool) files for things that could contain virus.
# We only report things: no mail files are changed.

# This script is not suitable for scanning executables; you could try
#	perl -pe 's/\x00//g' FILE.EXE | strings -a | checkvirus
# for those (but even that is not recommended).

# BEWARE: we specifically ALLOW .doc and similar files and matching
#         Content-Types to cut down on the noise...

# Use something like
#
#	checkvirus /var/spool/mail/*
#
# or maybe via the following script (checkmailspool):
#
## #!/bin/ksh -
## 
## # Search (mail spool) files for things that could contain virus;
## # mail to psz the new things we find.
## #
## # Run from crontab with line:
## #  55 4,13,18 * * * /usr/local/sbin/checkmailspool
## 
## PATH=/sbin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin:/usr/users/system/root/bin
## export PATH
## 
## CMD="${0##*/}"
## 
## if [ $# = 0 ]; then
##   CMP=true
##   TMP=/users/system/root/tmp/$CMD
##   # print "Searching files in /usr/spool/mail ..."
##   # print
##   cd /usr/spool/mail
##   set -- *
##   if [ -f $TMP.out ]; then
##     mv $TMP.out $TMP.out.old
##   else
##     touch $TMP.out.old
##   fi
## else
##   CMP=false
##   TMP=/users/system/root/tmp/$CMD-single
## fi
## 
## 
## /usr/local/bin/checkvirus "$@" > $TMP.out 2> $TMP.new
## 
## 
## if [ "$CMP" = true ]; then
##   diff $TMP.out.old $TMP.out > $TMP.diff
##   grep '^>' $TMP.diff | cat -v >> $TMP.new
## 
##   if [ -s $TMP.new ]; then
##     {
##       print
##       print "Above are additions since last check by $CMD, full results in:"
##       ls -l $TMP.out.old $TMP.out
##     } >> $TMP.new
##     mailx -s 'Viruses in mail files' psz < $TMP.new
##   fi
## else
##   cat $TMP.out
## fi

# What about a man page?

# Comments on design:
#
# We expect to "work" on mail files, i.e. human-readable text.
#
# We look at the file(s) line-by-line, without any "context" kept from line
# to line. Thus miss things when spread over several lines. (Should we
# attempt to keep $prevline, and re-check "$prevline$thisline"?). We do not
# attempt to check that content-type matches file extension; do nothing about
# encodings (e.g. compressed files) that would need knowledge of a block;
# miss mis-matched filename and name in Bagle variants. We thus also miss
# attacks like "missing filename" http://www.securityfocus.com/bid/13837.
# 
# Known bugs/problems:
# 
#   A CR at end-of-line (or a FF at the beginning) could interfere with our
#   detections; might remove them, but have not seen them "in the wild".
# 
#   Do not handle continuation of header lines: e.g. Subject lines could be
#   spread over several lines (and cause buffer overflow) but we do not notice.
#
# We keep just enough context to recognize mail message headers (structured
# as mail files), to aid in reporting. We also make a feeble attempt to
# recognize Word document attachments, to cut down on the noise.


use bytes;	# See man pages perlunicode, utf8, bytes


$debug = ($0 =~ m/debug/);
print "Debug mode: extremely verbose...\n\n" if $debug;

print "BEWARE: we allow DOC RTF XLS files (and matching Content-Types) ...\n";
print "Watch out for\n";
print "* EXE BAT COM and similar files, nasty JavaScript,\n";
print "  funny Content-Types, some HTML tags :\n\n";


sub base64 {
	# Decodes argument
	my ($b) = @_;
	my ($u) = '';
	my ($i, $l);

	$b =~ tr|A-Za-z0-9+/||cd;	# remove non-base64 chars (including padding)
	$b =~ tr|A-Za-z0-9+/| -_|;	# convert to uuencoded format

	$l = length($b);
	$b =~ s/.$//, $l-- if ($l % 4) == 1;

	# Break into lines of 60 encoded chars, prepending "M" or whatever.
	# For speed, keep all uuencoded strings and unpack in one go.
	$l -= 60;
	for ($i = 0; $i <= $l; $i += 60) {
	    $u .= "M" . substr($b, $i, 60);
	}
	$b = substr($b, $i) if $i;
	# and any leftover chars
	if ($b ne "") {
	    $u .= chr(32 + length($b)*3/4) . $b;
	}
	return unpack ("u", $u);
}


$| = 1;


$prevARGV = "";
$prevtitl = "";
$wordfile = 0;
$from = "";
$from2 = "";
$to = "";
$date = "";
$subj = "";
$head = 0;
$spam = 0;
$usydmail = 0;

while (<>) {
    if ($debug) { print "Read $_"; print "\n" unless m/\n$/ }

    $prevARGV = "", $head = 0, next if $_ eq "\n";
    if ($prevARGV ne $ARGV and m/^From (.*) \w{3} (\w{3} [ \d]?\d \d\d:\d\d):\d\d \d\d\d\d\n$/) {
      $from = $1;
      $from2 = "";
      $to = "";
      $date = $2;
      $subj = "";
      $head = 1;
      $usydmail = 1;
    }
    if ($head) {
      if (m/^From: (.*)\n/i) {
	$x = $1;
	$from2 .= " ($x)" unless
	  $from and
	  ( $x eq $from or
	    $x =~ m/\<\Q$from\E\>/ or
	    $x =~ m/^\Q$from\E \(/
	  );
      }
      if (m/^(?:To|X-smtpdoor-to|Orig-To): (.*)\n/i) {
	$x = $1;
	$to .= " (to $x)" unless
	  ( $x eq $ARGV or
	    $x =~ m/^\Q$ARGV\E\@/ or
	    $x =~ m/\<\Q$ARGV\E\@/ or
	    $to =~ m/ \(to \Q$x\E\)/
	  );
      }
      if ((! $subj) and m/^Subject: (.*)\n/i) {
	$subj = $1;
      }
      if (m/^Received: from /) {
	# Want to flag internal (USyd-originated) mail
	$x = $_;
	$x =~ s/ by .*//;
	if ($x =~ m/[^\w\.](\d+\.\d+\.\d+\.\d+)([^\w\.]|$)/) {
	  $x = $1;
	  $usydmail = 0 unless $x =~ /^(129\.78|10\.|172\.|127\.0\.)/ and $x !~/^172.(1[3-9][0-9])/;
	}
      }

      # Skip some spam filter lines
      if (m/^(X-spam-sign|X-(x-)*smtpdoor-spamsign|X-(x-)*PerlMx-Spam): /) {
	$spam = 1;
      }
      elsif ($spam and m/^[\t ]/) {
	next;
      }
      else {
	$spam = 0;
      }
    }
    $prevARGV = $ARGV;

    # Careful: do not be recursive.
    # I wonder how to check sender/recipient when Jim will use this
    # within smtpdoor...
    next if m/^> [ \*-] \w+(-\w+)?:/ and
      $ARGV eq 'psz' and
      $usydmail and
      (
        $from eq 'root@maths.usyd.edu.au' or
        ( $from eq 'daemon' and
          $from2 eq ' (root@maths.usyd.edu.au (system PRIVILEGED account))' )
      ) and
      $subj =~ m/^Viruses in \w+ mail files$/;
    next if m/\*\*NASTY\*\*/ and
      ( $ARGV eq 'psz' ) and
      $usydmail and
      $from eq 'smap@maths.usyd.edu.au' and
      $from2 eq '';
    next if m/^p\w+:(App|Sys)[lL]og(<FONT[^>]+>)?(<B>)?(<BLINK>)?:((Warning|Error):|:UPHClean - )/ and
      ( $ARGV eq 'mike' or $ARGV eq 'robertp' ) and
      $usydmail and
      $from eq 'mike@maths.usyd.edu.au' and
      $from2 eq '' and
      $subj =~ m/^(Lab \d+(\/\d+)?|Admin( PC)?|Staff( PC)?) night jobs notification$/;

    $p = "";
    $c = "";

    MATCH: {
      $p = "*Unprintable char", last MATCH if
# But some (most) may be dangerous, see
#  http://www.securityfocus.com/archive/1/442904
#  http://applesoup.googlepages.com/bypass_filter.txt
	m/[^\t\n\f\r\e -~\x81-\x9a\xa0-\xfe]/ and ! m/^\x04$/;
      $p = "*Mid-line break", last MATCH if
	m/[\n\x8a\x8d]./ or (m/\r.*[^\s]/ and !m/^[^\r]*\r=$/) or m/.\f/;
      $p = "*Escape sequence", last MATCH if
	m/\e/ and (m/\e([^\$\(]|$)/ or m/\e\$([^B]|$)/ or m/\e\(([^BJ]|$)/);	# Allow Asian languages

      $p = "*Long header", last MATCH if
	$head and m/.{200}/ and ! m/^((Received|References|DKIM-Signature|DomainKey-Signature|X-\w[\w-]*\w): |[\t ]id \d+ for )/;
      $p = "*Long line", last MATCH if
	m/.{999}/;


      # Exception: nothing wrong with it "as is",
      # but might complain after uXX and utf7 decoding
      next if
	m/^<a href=3D"mailto:pmx-auto-approve%2b[a-z0-9]+\@sup=$/ or
  	m/^<tr><td> <a href=3D"mailto:pmx-auto-approve%2b[a-z0-9]+=$/;

      if ( m/</ ) {
	@x = ( $_, split '<' );
      }
      else {
	@x = ( $_ );
      }
      foreach $x ( @x ) {
	if ($debug) { print "Test $x"; print "\n" unless $x =~ m/\n$/ }
	if ( ( ($t,$n) = $x =~ m/(?:^|[^\w?\$])(
		applet.*?code |
		applet.*?object |
		archive |		# applet
		background |
		bgsound |
		boundary |
		classid |		# object
		codebase |		# applet, object
		dynsrc |
		embed\s(?:.*?\s)?src |
		filename |
		form\s(?:.*?\s)?action |
		href |
		i?frame\s(?:.*?\s)?src |
		i?layer\s(?:.*?\s)?src |
		img\s(?:.*?\s)?lowsrc |
		img\s(?:.*?\s)?src |
		input\s(?:.*?\s)?src |
		link\s(?:.*?\s)?src |
		longdesc |		# frame, img
		name |
		meta\s(?:.*?\s)?url |
		object.*?data |
		pluginspage|pluginurl |	# embed
		script\s(?:.*?\s)?src |
		usemap |		# img, input, object
		onload |		# actions for various
		onunload |
		onerror |
		onclick |
		ondblclick #|
		onkey(?:down|press|up) |
		onmouse(?:down|enter|leave|move|out|over|up|wheel)
		)\s*=(?:3d)?\s*(.*)/ix ) or
	     ( ($t,$n) = $x =~ m/(?:^|[^\w?\$])\w+(:src)\s*=(?:3d)?\s*(.*)/i ) or
	     ( ($t,$n) = $x =~ m/^\s*(begin)\s+\d*\s+(.*)$/i ) ) {

	  # Exceptions
	  next if
	    m/^ *name=(3D)?\"\w+-?\w+\"\/>$/ or
	    m/^ *name=(3D)?\w+-?\w+>\s*$/ or
	    ( $t eq 'href' && $n =~ m/^"http:\/\/([a-z][a-z]\.rd|rd|pa)\.yahoo\.com\/(.*\/)?\*/ ) or
	    ( $t eq 'href' && $n =~ m/^"http:\/\/chkpt\.zdnet\.com\/chkpt\/astr\w+\/$/ ) or
	    ( $t eq 'HREF' && $n =~ m/^"http:\/\/weeklynews\.lastminute\.com\// and $n =~ m/\/Key/ ) or
	    ( $t eq 'href' && $n =~ m/^"http:\/\/www\.sulekha\.com\// and $n =~ m/.asp\?cid/ ) or
	    ( $t eq 'href' && $n =~ m/^"http:\/\/cgi\w*\.ebay\.com(\.au)?\/\w+-?\w+\/eBayISAPI.dll\?[^"]*(">|=$)/ ) or
	    ( $t eq 'href' && $n =~ m/^"mailto:info\@asia\.cnet\.com ">/ ) or
	    ( $t eq 'href' && $n =~ m/^"https?:\/\/www\.vintagecellars\.com\.au\/scripts\/(xworks\.exe|wc\.dll)\?/ ) or
	    ( $t eq 'href' && $n =~ m/^"http:\/\/www\.ht\.com\.au\/scripts\/xworks\.exe\?/ ) or
	    ( ($t eq 'href' or $t eq 'HREF' or $t eq 'img src') && $n =~ m/^"http:\/\/\w+\.(f2|fairfax)\.com\.au\/event\.ng\/Type=/ ) or
	    ( $t eq 'href' && $n =~ m/^"http:\/\/www\.sswug\.org\/sitereg\.reg">/ );
	  next MATCH if
	    ( $t eq 'href' && $n =~ m/^"mailto:pmx-auto-approve%\w+\@suphys.physics.usyd.edu.au\?subject=Release%20message%20from%20quarantine/ );
	  $q = ''; $x1 = '';
	  if ($t !~ m/^begin/i) {
	    if ($n =~ s/^\"//) {
	      $q = '"';
	      $q = '""', $x1 = $1 if $n =~ s/\"(.*)//;
	    }
	    elsif ($n =~ s/^\'//) {
	      $q = "'";
	      $q = "''", $x1 = $1 if $n =~ s/\'(.*)//;
	    }
	    else {
	      $n =~ s/([^\s])\s.*/$1/ unless m/^\s*(file)?name\s*=/;
	      $n =~ s/\>.*// if m/</;
	    }
	  }
	  $p = "*Bad quoting", last MATCH if
	    $x1 and $x1 !~ m/^([\s\>;,]|=20|=$)/ and
	    ! ($x1 =~ m/^-->/ and $x =~ m/(^|<)!--/);
	  $p = "*Un-terminated quote", last MATCH if
	    $q =~ m/^.$/ and
	    ! ( $c =~ m/ --b64 / or
		( $x =~ m/=$/i && ( $x =~ m/=3d/i or $c =~ m/ --uXX / ) ) or
		( m/a href="http:\/\/newsletters.fairfax.com.au\// && m/.{900}/ && m/!$/ ) );

	  $p = "*IE URL obfuscation", last MATCH if
	    $n =~ m/(%[01][0-9a-f]|\&#x0*[01][0-9a-f][^0-9a-f]|\&#0*[12]?[0-9][^0-9]).*(\@|=40|%40|\&#x0*40([^0-9a-f]|$)|\&#0*64([^0-9]|$))/i or
	    $n =~ m/=[01][0-9a-f]([^0-9].*)?(\@|=40|%40|\&#x0*40([^0-9a-f]|$)|\&#0*64([^0-9]|$))/i or
	    $n =~ m/\?\@/;
	  $p = "*IE download alert bypass", last MATCH if
	    $n =~ m/\.\s*(\w{1,6})\s*\?\./i or
	    $n =~ m/\.\s*(\w{1,6})\s*\?(%[01][0-9a-f]|\&#x0*[01][0-9a-f][^0-9a-f]|\&#0*[12]?[0-9][^0-9]|=[01][0-9a-f])/i;

	  if ($q and $t =~ m/^(href|img)/i and ( $n =~ m/^https?:/i or $n =~ m/^[^:]*\?/ ) ) {
	    $n =~ s/\?.*//;
	  }

	  # Long names may cause buffer overflow
	  $p = "*Long name", last MATCH if
	    $n =~ m/.{100}/ and
	    ( $t !~ m/^(href|img)/i or $n =~ m/.{200}/ );
	  next if $t =~ m/^boundary/i;	# Only checked Eudora buffer overflow

	  # Ignore common "nasties"
	  $n =~ s/\[\d\]// if $t =~ m/^(name|filename)/i and $q eq '""' and $n =~ m/^\w+\[\d\]\.\w+$/;
	  $n =~ s/ $// if $t =~ m/^(href|img)/i and $q eq '""' and $n =~ m/^[^ ]* $/;

	  # Outlook may simply remove (skip) illegal characters in filenames e.g. virus.%vb*s
	  # Eudora (for uuencoded blocks) skips all control characters (except CR or NL)
	  # and any of "*/:<>?\| .
	  $p = "*Bad name", last MATCH if
	    $t =~ m/^(begin|name|filename|embed)/i and
	    ! ( $t =~ m/^name/i and ( $x =~ m/(^|<)(input|select|textarea) /i or $x =~ m/(^|<)a name=/i ) ) and
	    (
	      ( $q eq '""' and $n =~ m/[^\w\.\s\&=\(\)\'-]/i and $x !~ m/(^|<)a name=(3d)?\"#\w+\"(>|$)/i ) or
	      ( $q ne '""' and $n =~ m/[^\w\.\s\&=-]/i ) or
	      ( ! $q and $n =~ m/[^\w\.-]/i )
	    );

	  $p = "*IE long share name buffer overflow", last MATCH if
	    $t !~ m/^(name|filename)/i and $n =~ m/^(file:)?[\\\/]*[0-9a-fx\.]+[\\\/]*$/i;
	  $p = "*Remote DoS IE Memory Access Violation", last MATCH if
	    $t !~ m/^(name|filename)/i and $n =~ m/^(file:)?[\\\/]*[^a-z][:|]/i;

	  $p = "*Viral web server", last MATCH if
	    $n =~ m/^http:\/\/\d+\.\d+\.\d+\.\d+:(\d+)\// and $1 > 82 and $1 < 8000;

	  # mailto URLs in Outlook in pre-SP3 OfficeXP (==Office2002)
	  $p = "*Embedded quote", last MATCH if
	    $n =~ m/"|=22|%22|\&#x0*22([^0-9a-f]|$)|\&#0*34([^0-9]|$)|\&quot;/i;

	  # Weird "URL protocol handlers" (about: or javascript:)
	  $p = "*Bad URL protocol", last MATCH if
	    $n =~ m/^(\w+|\w+-\w+):/ and
	    ( $x1 = $1, 1 ) and
	    $x1 !~ m/^(http|https|mailto|Mailto|ftp)$/ and
	    ! ( $t =~ m/^img/i and $x1 =~ m/^cid$/ ) and
	    ! ( $t =~ m/^(href|:src|background)/ and $q eq '""' and $n =~ m/^cid:\w[\w.-]+\w\@\w[\w.]+\w$/i );

	  # Any extension allowed, e.g. nul.txt is same NUL device.
	  # References:
	  # http://www.securityfocus.com/archive/1/193189
	  # http://www.securityfocus.com/archive/1/193306
	  # http://www.securityfocus.com/archive/1/195054
	  # http://support.microsoft.com/kb/256015
	  $p = "*DOS device name", last MATCH if
	    $n =~ m/(^|[:\/\\])(\$MMXXXX0|AUX|CAS\d*|CLOCK\$|COM\d|CON|CONFIG\$|DBLBUFF\$|DBLSBIN\$|EMMXXXX0|HLP\$|IFS\$|IFS\$HLP\$|IPC\$|LPT\d|MAILSLOT|MS\$MOUSE|MSCD\d+|NUL|PIPE|PRN|SCSIMGR\$|SETVERXX|UNC|XMSXXXX0)($|[\.\/\\])/i;

	  $p = "*CLSID extension", last MATCH if
	    $n =~ m/\{[0-9a-f]{2}/i;

	  $p = "*Dot name (dot bug)", last MATCH if
	    #$n =~ m/\.\s*$/i and $c !~ m/ --b64 /;
	    $n =~ m/[\.\s]$/i and $c !~ m/ --b64 /;
	  $p = "*Dot-blank name (dot bug)", last MATCH if
	    $n =~ m/\.\s/i;
	  $p = "*Multiple blanks in name", last MATCH if
	    $n =~ m/\s\s/i;


	  $e = "";
	  $e = $1 if
	    $n =~ m/\.\s*(\w{1,6})\s*$/i or
	    ( ! $q and ( $n =~ m/^[^=]*\.\s*(\w{1,6})($|[^\w\/\.:])/i or $n =~ m/\.\s*(\w{1,6})($|[^\w\/\.:])/i ) );
	  $e = "" if $e and
	    ($t =~ m/^img/i and $q eq '""' and $n =~ m/cid:\w[\w.-]+\w\@\w[\w.]+\w$/i) or
	    # Careful with href: too many are of type "<a href=http://xyz.com>".
	    ($t =~ m/^href/i and
	      (
	      $n =~ m/^(http:\/\/|https:\/\/|mailto:|Mailto:|ftp:\/\/)[^\\\/]*$/ or
	      $n =~ m/^(http:\\\\)[^\\\/]*$/
	      )
	    );

	  next unless $e;

	  # Add ZIP and RAR, but not TAR or TGZ, to executables...
	  # See also:
	  # Description of how the Attachment Manager works in Microsoft Windows
	  #   http://support.microsoft.com/kb/883260
	  # An overview of unsafe file types in Microsoft products
	  #   http://support.microsoft.com/kb/925330
	  # Understanding Executable Content in Microsoft Products
	  #   http://download.microsoft.com/download/3/7/6/376D0958-BE3E-4D46-82F7-F6C98D84370E/Understanding Executable Content in Microsoft Products.docx
	  # Executable File Types
	  #   http://www.fileinfo.com/filetypes/executable
	  $p = "*Executable extension", last MATCH if
	    $e =~ m/^(ade|adp|ahk|air|ani|apk|app|as|asd|asp|asx|ba_|bas|bat|btm|cer|chm|cmd|cnf|cnt|com|command|cpl|crt|csh|der|dll|do|dot|ear|ebm|ebs|ebs2|elf|eml|es|esh|ex_|exe|fky|folder|frs|fxp|gadget|grp|hlp|hms|hta|ica|inf|ins|inx|ipf|isp|isu|its|jar|jnlp|job|js|jse|jsx|ksh|lnk|lua|ma.|md.|mem|mht|mhtml|mpx|ms.|mshxml|mxe|nws|obs|ocx|ops|osx|otm|paf|pcd|pex|pif|plx|pot|prc|prf|prg|ps1|pst|pvd|pwc|pyc|pyo|qpx|rar|rdp|reg|rgs|rm|scf|scr|sct|sh.|slk|smi|spr|sys|tlb|u3p|udf|uls|upx|url|vb|vb.|vbscript|vpm|vs.|vsmacros|vxd|wal|wcm|widget|wiz|wm.|wpk|wpm|ws|ws.|xl|xlm|xlt|xnk|xqt|zip)$/i;
	  $p = "*WinZip8 bug extension", last MATCH if
	    $e =~ m/^(b64|bhx|hqx|mim|uu|uue|uu.|xxe)$/i;
	  # LHA references:
	  # http://lists.grok.org.uk/pipermail/full-disclosure/2004-April/020702.html
	  # http://lists.grok.org.uk/pipermail/full-disclosure/2004-May/020776.html
	  # http://lists.grok.org.uk/pipermail/full-disclosure/2004-May/020990.html
	  $p = "*WinZip9 bug extension", last MATCH if
	    $e =~ m/^lha$/i;
	  $p = "*Acroread5.1 bug extension", last MATCH if
	    $e =~ m/^xfdf$/i;
	  $p = " Unrecognized extension" if
	    ! ($e =~ m/^(doc|docx|ppt|rtf|xls|xlsx)$/i) and	### BEWARE
	    ! ($e =~ m/^(asc|avi|bib|bmp|css|dat|data|dvi|eps|f90|gif|gz|htm|html|ics|jpeg|jpg|log|m|mpeg|mpg|p7m|p7s|pcx|pdf|png|pps|ps|shtml|sty|tex|tif|tiff|txt|vcf|xml)$/i) and
	  # ! ($t =~ m/^(href|img|form)/i and $e =~ m/^(\d+|asp|aspx|cfm|cgi|dll|e|gsp|gw|jhtml|jsp|php|php3|pl)$/i) and
	    ! ($t =~ m/^(href|img|form)/i) and	### BEWARE: Allow any as references???
	    ! ($usydmail and $from =~ m/^(\w\.)?\w+\@maths\.usyd\.edu\.au$/ and ! $from2);	# Allow funny named files internally
	  if ($e =~ m/^doc$/i and m/\sfilename=".*doc"$/i) {
	    $wordfile = 2;
	    print "wordfile on: $_\n" if $debug;
	    # Quite often we will miss Word files: because the filename line
	    # ends with semicolon, or there are more MIME headers following,
	    # or because the name was encoded in some way.
	    # We only use this to ignore "\WINDOWS" strings (that often
	    # appear as where the file was saved). We know nothing about
	    # NORMAL.DOT files...
	  }
	}
      }

      last MATCH if $_ eq "ssid=\"clsid:38481807-CA0E-42D2-BF39-B33AF135CC4D\" id=ieooui></object>\n" and
	$from eq 'C.Cheen@maths.usyd.edu.au' || $from2 =~ m/C.Cheen\@maths.usyd.edu.au/;
      last MATCH if $from =~ m/\@newsletters.fairfax.com.au/ and
	( m/^\s*var container = document.getElementById\(sContainer\);\s*$/ ||
	  m/^\s*parent.document.getElementById\(sFrame\).style.(width|height) = (width|height|iX|iY) \+ "px";\s*$/ ||
	  m/\sdocument.domain = domain;\s*$/ );

      # Things found in HTML
      $p = "*HTML construct", last MATCH if
	m/type=(3d)?\"?password\"?\s*value=(3d)?[^>]{10}/i or	# Netscape pre-4.76: <form><input type=password value=exploit></form>
	( m/script\s*language[^\w]/i and ! m/<script language=(3d)?"?(javascript(\d\.\d)?|jscript)"?(>| |$)/i and ! m/PostScript\s*Language/i ) or
	m/Language\s*=(3d)?\s*["']?VBS(cript)?/i or
	m/<\s*vbs(cript)?($|[^\w])/i or
	m/classid\s*=/i or
	m/clsid:[0-9a-f]/i or
#	( m/src\s*=(3d)?\s*\"?cid:/i and ! m/img\s*src\s*=(3d)?\s*\"?cid:/i )
	m/i?frame\s(?:.*\s)?src\s*=(3d)?\s*\"?cid:/i or
	m/<xml id=/i	# http://www.microsoft.com/technet/security/advisory/961051.mspx http://isc.sans.edu/forums/diary/5458
	;

      # Things found in PDF, see http://secdiary.com/article/novel-detection-malicious-pdf-javascript/
      $p = "*PDF construct", last MATCH if
	#m/\/FlateDecode/ or
	m/\/JavaScript/
	;

      # Things found in (java,VB)scripts
      # Should we try to match obfuscated (or try to deobfuscate) as in http://secdiary.com/article/novel-detection-malicious-pdf-javascript/ ?
      $p = "*Bad script", last MATCH if
	m/^\s*Execute(\s*=$|(\s+\w+)?\s*\(\s*[\"\w])/i or
	m/expression\s*\(\s*eval\s*\(/i or
	m/unescape\(/i or
	m/String\.fromCharCode/i or
	m/\.charAt\(/i or
	m/\.charCodeAt\(/i or
	m/\.(inner|outer|insertAdjacent|paste|alt)HTML/i or
	m/\.getElementById\s*\(/i or
	m/location\.href/i or
	m/location\.replace/i or
	m/\.cookie/i or
	m/\.clipboardData/i or
	m/\.onkeydown/i or
	m/set(Timeout|Interval)\s*\(/i or
	m/document\.domain/i or
	m/event\.ctrlKey/i or
	m/event\.keycode/i or
	m/window\.createPopup/i or
	( m/window\.open/i and ! m/href='http:\/\/(livemusic|www).moshtix.com.au\/record.asp\?.* onClick="window.open\(this.href,/ ) or
	m/window\.showHelp/i or
	m/self\.close/i or
	m/object\.saveas/i or
	( m/\.shell/i and ! m/\w\@[\w\.]+.shell.com/ ) or
	m/Script\.Run/i or
	m/wsh\.run/i or
	m/Adodb\.Stream/i or
	m/\.XMLHTTP/i or
	m/ActiveXComponent/i or
	m/ActiveXObject/i or
	m/CreateObject/i or
	m/DatagramSocket/i or
	m/GetObject/i or
	m/MSScriptControl/i or
	m/show(Modal|Modeless)Dialog/i or
	m/showHelp\s*\(/i or
	m/TypeLibrary/i or
	m/Scripting\.\w/i or
	m/Scriptlet\.\w/i or
	m/WScript\.\w/i or
	m/iframe\.\w/i or
	m/File\.Close/i or
	m/File\.Open/i or
	m/Request\.Form/i or
	m/Request\.ServerVariables/i or
	m/Response\.Write/i or
	m/FileSystemObject/i or
	m/\.ActiveX\./i or
	m/\.AddressEntries/i or
	m/\.AddressLists/i or
	m/\.Applets\s*[\(\[]/i or
	( m/\.Copy\s*\(/i and ! m/dev\.copy\(postscript/ ) or
	m/\.CopyFile/i or
	m/\.CreateFolder/i or
	m/\.CreateInstance/i or
	m/\.CreateTextFile/i or
	m/\.DeleteFile/i or
	m/\.FileExists/i or
	m/\.FolderExists/i or
	m/\.GetBaseName/i or
	m/\.GetExtensionName/i or
	m/\.GetFile/i or
	m/\.GetFolder/i or
	m/\.GetNameSpace/i or
	m/\.GetSpecialFolder/i or
	( m/\.Language/i and ! m/languages\.arts\.usyd\.edu\.au/ ) or
	m/\.OpenAsTextStream/i or
	m/\.OpenTextFile/i or
	m/\.RegRead/i or
	m/\.RegWrite/i or
	m/\.Run(\s|\()/i or
	m/\.SaveToFile/i or
	m/\.ScriptControl/i or
	m/\.SetCLSID/i or
	m/\.SpecialFolders/i or
	m/\.TypeLib/i or
	m/\.WriteLine/i or
	m/\.WriteLn/i or
	m/\.Write\s*\(/i
	;

      # Things found in exe files (but some also in common, innocent files...)
      $p = "*Binary(?) content", last MATCH if
	m/AddMonitorA/i or
	m/AddPrinterA/i or
	m/AddPrinterDriverA/i or
	m/BeginUpdateResourceA/i or
	m/CallWindowProcA/i or
#	m/CloseHandle/i or
	m/ClosePrinter/i or
	( m/CoCreateInstance/i and ! m/ CoCreateInstanceEx returned / ) or
	m/CoInitialize/i or
#	m/CopyFileA/i or
	m/CreateDirectoryA/i or
	m/CreateEventA/i or
#	m/CreateFileA/i or
	m/CreateFileMapping/i or
	m/CreateMutexA/i or
	m/CreateProcessA/i or
	m/CreateRemoteThread/i or
	m/CreateThread/i or
	m/CreateWindowExA/i or
	m/DefWindowProcA/i or
#	m/DeleteFileA/i or
	m/DeleteMonitorA/i or
	m/DeletePortA/i or
	m/DeletePrinter/i or
	m/DeletePrinterDriverA/i or
	m/DestroyIcon/i or
	m/DestroyWindow/i or
	m/DialogBoxParamA/i or
	m/DisableLowDiskWarning/i or
	m/DispatchMessageA/i or
	m/DriverDesc/i or
	m/EndUpdateResourceA/i or
	m/ExpandEnvironmentStringsA/i or
	m/ExitProcess/i or
	m/ExitThread/i or
	m/FindClose/i or
	m/FindFirstFileA/i or
	m/FindNextFileA/i or
	m/FindResourceA/i or
	m/FlushFileBuffers/i or
	m/FreeEnvironmentStrings/i or
	m/FreeLibrary/i or
	m/FtpCommand[AW]/i or
	m/FtpDeleteFile[AW]/i or
	m/FtpGetFile(A|Ex|W)/i or
	m/FtpOpenFile[AW]/i or
	m/FtpPutFile(A|Ex|W)/i or
	m/FtpRenameFile[AW]/i or
	m/GetACP/i or
	m/GetActiveWindow/i or
	m/GetCPInfo/i or
	m/GetCommandLineA/i or
	m/GetComputerNameA/i or
	m/GetCurrentDirectoryA/i or
	m/GetCurrentProcessId/i or
	m/GetCurrentThread/i or
	m/GetCurrentThreadId/i or
	m/GetDlgItem/i or
	m/GetDriveTypeA/i or
	m/GetEnvironmentStrings/i or
	m/GetFileAttributesA/i or
#	m/GetFileSize/i or
	m/GetFileTime/i or
	m/GetFileType/i or
	m/GetLastActivePopup/i or
	m/GetLastError/i or
	m/GetLogicalDrives/i or
	m/GetMessageA/i or
#	m/GetModuleFileName/i or
#	m/GetModuleHandle/i or
	m/GetOEMCP/i or
	m/GetParent/i or
	m/GetPrivateProfileStringA/i or
#	m/GetProcAddress/i or
	m/GetProfile/i or
	m/GetSpecialFolderPath/i or
	m/GetStartupInfoA/i or
	m/GetStdHandle/i or
	m/GetStockObject/i or
	m/GetSystemDirectory/i or
	m/GetSystemTime/i or
	m/GetTempFileName/i or
	m/GetTempName/i or
	m/GetTempPathA/i or
	m/GetTickCount/i or
	m/GetVersion/i or
	m/GetWindowTextA/i or
	m/GetWindowsDirectory/i or
	m/HeapAlloc/i or
	m/HeapCompact/i or
	m/HeapCreate/i or
	m/HeapDestroy/i or
	m/HeapFree/i or
	m/HideFileExt/i or
	m/HttpAddRequestHeaders[AW]/i or
	m/HttpEndRequest[AW]/i or
	m/HttpOpenRequest[AW]/i or
	m/HttpQueryInfo[AW]/i or
	m/HttpSendRequest(Ex)?[AW]/i or
	m/InternetAttemptConnect/i or
	m/InternetAutodial/i or
	m/InternetCheckConnection[AW]/i or
	m/InternetCloseHandle/i or
	m/InternetConnect[AW]/i or
	m/InternetDial[AW]/i or
	m/InternetGetConnectedState/i or
	m/InternetGetCookie(Ex)?[AW]/i or
	m/InternetGoOnline[AW]/i or
	m/InternetOpenUrl[AW]/i or
	m/InternetOpen[AW]/i or
	m/InternetQueryData/i or
	m/InternetQueryOption[AW]/i or
	m/InternetReadFile/i or
	m/InternetSetCookie(Ex)?[AW]/i or
	m/InternetSetDialState[AW]/i or
	m/InternetSetOption(Ex)?[AW]/i or
	m/InternetWriteFile/i or
	m/LoadCursorA/i or
	m/LoadIconA/i or
	m/LoadLibraryA/i or
	m/LoadLibraryExA/i or
	m/LoadResource/i or
	m/LoadStringA/i or
	m/LocalAlloc/i or
	m/LocalFree/i or
	m/MapViewOfFile/i or
	m/MessageBoxA/i or
	m/MoveFileA/i or
	m/MoveFileExA/i or
	m/OpenPrinterA/i or
	m/OpenProcess/i or
	m/PostMessageA/i or
	m/PostQuitMessage/i or
#	m/RegCloseKey/i or
#	m/RegCreateKey/i or
	m/RegDeleteKeyA/i or
	m/RegDeleteValueA/i or
	m/RegEnumKeyA/i or
	m/RegEnumKeyExA/i or
	m/RegEnumValueA/i or
#	m/RegOpenKey/i or
#	m/RegQueryValue/i or
#	m/RegSetValue/i or
	m/RegisterClassA/i or
	m/RegisterClassExA/i or
	m/RegisterService/i or
	m/ReleaseMutexN/i or
	m/Run_restore_LFN/i or
	m/SendMessageA/i or
#	m/SetEndOfFile/i or
	m/SetFileAttributes/i or
#	m/SetFilePointer/i or
	m/SetFileTime/i or
	m/SetHandleCount/i or
	m/SetInternetPhoneNumber[AW]/i or
	m/SetStdHandle/i or
	m/SetThreadPriority/i or
	m/SetWindowLongA/i or
	m/SetWindowTextA/i or
	m/ShellExecute(A|Ex|W)/i or
	m/Shell_NotifyIcon/i or
	m/SHGetMalloc/i or
	m/ShowSuperHidden/i or
	m/SizeofResource/i or
	m/StringFileInfo/i or
	m/UninstallDir/i or
	m/UninstallPath/i or
	m/UnregisterClassA/i or
	m/UpdateResourceA/i or
	m/VirtualAlloc/i or
	m/VirtualFree/i or
	m/VirtualProtectEx/i or
	m/VirtualQueryEx/i or
	m/WaitForSingleObject/i or
	m/Win32BaseServiceMOD/i or
	m/WinPrint/i or
	m/WriteFile/i or
	m/WritePrivateProfileStringA/i or
	m/WriteProcessMemory/i or
	m/WriteProfileString/i or
	m/outlook\.application/i
	;

      $p = "*Syscall", last MATCH if
	m/   M A P I   /i or
	m/MAPIAddress/i or
	m/MAPIDeleteMail/i or
	m/MAPIDetails/i or
	m/MAPIFindNext/i or
	m/MAPIFreeBuffer/i or
	m/MAPIGetName/i or
	m/MAPILogoff/i or
	m/MAPILogon/i or
	m/MAPIReadMail/i or
	m/MAPIResolveName/i or
	m/MAPISaveMail/i or
	m/MAPISendDocuments/i or
	m/MAPISendMail/i or
#
	m/WNetAddConnection/i or
	m/WNetCancelConnection/i or
	m/WNetCloseEnum/i or
	m/WNetEnumResourceA/i or
	m/WNetOpenEnumA/i or
	m/WSAAsyncGet/i or
	m/WSAAsyncSelect/i or
	m/WSACancel/i or
	m/WSACleanup/i or
	m/WSAFDIsSet/i or
	m/WSAIsBlocking/i or
	m/WSAStartup/i or
	m/WSAUnhook/i
	;

      $p = "*Reserved name", last MATCH if
	( m/\\WIN(DOWS|NT|2K)/i and ! $wordfile and ! m/\\WINDOWS\\temp\\/i and ! m/\w[CD]:\\WINDOWS/ and ! m/\\rome\\home\\Windows\\/ ) or
	m/[\/\\]WIN(DOWS|NT|2K)[\/\\]+SYSTEM(|32)[\/\\]/i or
	m/Start Menu\\+Programs\\+StartUp/i or
#
	m/HKEY_CLASSES_ROOT/i or
	m/HKEY_CURRENT_CONFIG/i or
	( m/HKEY_CURRENT_USER/i and ! m/The resource 'HKEY_CURRENT_USER\\[\w\\ \.]+' does not exist/ ) or
	m/HKEY_DYN_DATA/i or
	m/HKEY_LOCAL_MACHINE/i or
	m/HKEY_PERFORMANCE_DATA/i or
	m/HKEY_USERS/i or
	m/SOFTWARE\\+Microsoft\\+Windows\\+CurrentVersion/i or
	m/Software\\+Microsoft\\+Internet Account Manager/i or
	m/Software\\+Microsoft\\+Internet Explorer/i or
	m/System\\+CurrentControlSet\\+/i or
	m/shell\\+open\\+command/i or
	m/shell\\+open\\+ddeexec/i or
#
	m/\[InternetShortcut\]/i or
	m/\[SearchPaths\]/i
	;

      $p = "*Library", last MATCH if
#	m/ADVAPI32\.dll/i or
	m/COMCTL32\.dll/i or
	m/comdlg32\.dll/i or
	m/GDI32\.dll/i or
#	m/KERNEL32\.dll/i or
	m/MAPI32\.DLL/i or
	m/MPR\.DLL/i or
	m/MSVBVM60\.DLL/i or
	m/MSVCRT20\.dll/i or
	m/OLE2\.DLL/i or
	m/ole32\.dll/i or
	m/riched20\.dll/i or
#	m/SHELL32\.dll/i or
	m/SPOOLSS\.DLL/i or
	m/STORAGE.DLL/i or
#	m/USER32\.dll/i or
	m/wininet\.dll/i or
	m/WINMM\.dll/i or
	m/ws2_32\.dll/i or
	m/WSOCK32\.dll/i or
#
	m/advapi\.dlx/i or
	m/CSRSS\.EXE/i or
	m/FAXDELPR\.LNK/i or
	m/GENFAX\.APD/i or
	m/msdos\.sys/i or
	m/msmail\.ini/i or
	m/MSWINSCK\.OCX/i or
	m/path\.ini/i or
	m/setup\.inf/i or
	m/system\.ini/i or
	m/WAN\.TSP/i or
	m/WinInit\.ini/i or
	m/WINLFN\.INI/i or
	m/WINSPOOL\.DRV/i or
	m/winsvrc\.exe/i or
	m/winsvrc\.vxd/i or
	m/WPSUNI\.DRV/i
	;

      $p = "*(J,VB)Sript.Encode header", last MATCH if
	m/\Q#@~^\E[A-Za-z0-9+\/]{6}==/;

      $p = "*RTF embedded", last MATCH if
	m/\\obj(ect|emb|link|autlink|icemb|ocx|class|data)/;

      $p = "*fake Eudora attachment", last MATCH if
	m/(^|[^#])Attachment Converted: /i;
      $p = "*fake Eudora option/setting", last MATCH if
	m/x-eudora-(option|setting):/i;


      # Concession to specific viruses...
      $p = "*Hybris", last MATCH if
	m/HYBRIS/i;
#      $p = " Nigeria" if
#	m/Nigeria/ and ! m/^Location: Nigeria$/;	# Not a virus, but common and annoying...
      $p = " e-card" if
	m/www\.friend-greeting\.com/i or		# Not a virus, but almost...
	m/www\.friend-greeting\.net/i or
	m/www\.friend-cards\.net/i or
	m/www\.friendgreetings\.com/i or
	m/www\.friend-greetings\.com/i or
	m/www\.friend-greetings\.net/i or
	m/www\.cool-downloads\.net/i
	;


      # RFC2046 (2045-2049, 2017)
      $p = "*Bad content-type", last MATCH if
	m/content-type:\s*message\/(partial|external-body)/i or
	m/content-type:\s*application\/vnd\.adobe\.xfdf/i;

      # Funny content-types. We do not check if it matches the filename...
      $p = " Unrecognized content-type" if
	m/content-type:/i and
	! ( $head and ( m/^DomainKey-Signature:/i or m/^DKIM-Signature:/i or m/^\s/ ) and m/:content-type:/i ) and
	! m/^x400-content-type: /i and
	! m/^Content-Type:\s*TeX File;$/ and
	! m/content-type:\s*application\/(ms-tnef|msexcel|msword|msonenote|rtf|vnd.ms-excel|x-msexcel)($|[^\/\w])/i and	### BEWARE
	! m/content-type:\s*text\/(enriched|richtext)($|[^\/\w])/i and	### BEWARE
	! m/content-type:\s*text($|[^\/\w])/i and
	! m/content-type:\s*text\/(calendar|css|html|plain|rfc822-headers|text|x-tex|x-vcard|xml)($|[^\/\w])/i and
	! m/content-type:\s*image\/(bmp|gif|jpeg|jpg|pjpeg|png|tiff|x-tiff)($|[^\/\w])/i and
	! m/content-type:\s*audio\/(wav)($|[^\/\w])/i and
	! m/content-type:\s*video\/(avi|mpeg|mpg)($|[^\/\w])/i and
	! m/content-type:\s*message\/(delivery-status|news|rfc822)($|[^\/\w])/i and
	! m/content-type:\s*multipart\/(alternative|appledouble|digest|mixed|related|report|signed)($|[^\/\w])/i and
	! m/content-type:\s*application\/(applefile|gzip|jpg|mac-binhex40|octet-stream|pdf|pgp-signature|pkcs7-mime|pkcs7-signature|postscript|text\/plain|x-dvi|x-gzip|x-pdf|x-pkcs7-signature|x-tex|x-www-form-urlencoded)($|[^\/\w])/i
	;

      last MATCH if $_ eq "</xml><![endif]--><!--[if !mso]><object\n" and
	$from eq 'C.Cheen@maths.usyd.edu.au' || $from2 =~ m/C.Cheen\@maths.usyd.edu.au/;
      # Funny HTML
      $p = " Funny HTML" if
	m/<\s*(i?frame|i?layer|object|applet|embed)($|[^\w])/i or
	( m/<\s*script($|[^\w])/i and ! m/<script language=(3d)?"?(javascript(\d\.\d)?|jscript)"?(>| |$)/i ) or
	( ( ( m/(^|[^\w])width=\"?0([^\d]|$)/i  and ! m/<td /i ) or	# Would like [01], but too many ...
	    ( m/(^|[^\w])height=\"?0([^\d]|$)/i and ! m/<(td|table) /i ) ) and
	  ! m/<img src="http:\/\/\w+\.(f2|fairfax)\.com\.au\/event\.ng\/Type=/
	);


      # MIME continuation, RFC2231
      $p = " MIME continuation" if
	m/(?:^|[^\w?\$])(name|filename)\*[\w]*\*?\s*=/i;


#     last MATCH if $p or $c;
      last MATCH if $p;


      # Check for several encoding, quoting schemes
      if (m/^[A-Za-z0-9+\/]{20,}=*$/) {
	# base64
	$c = " --b64 $_$c";
	chomp;
	## Mail corruption? See ticket #2161.
	## Was caused by bug in Wire.pm, now fixed.
	#$p = "*Missing character", $_ = 'Missing character', last MATCH if
	#  m/^[A-Za-z0-9+\/]{60,}$/ and length()%4 == 3 and ! m/^\++$/ and ! m/^(4SM4SM4SM4SM4){5}/;
	$_ = &base64($_);
      }
      elsif (m/^[!-`]{20,}$/) {
	# uuencode
	# Lines cannot be longer than 84 characters (can encode
	# from 0 to 63 chars). Could check that we have a correct
	# leading character:
	#	and substr($_,0,1) eq chr(32 + length($_)*3/4)
	#	and ord($_) == 32 + length($_)*3/4
	# (beware newline is counted). However, unpack can cope with
	# multi-line encoding, even without intervening newlines.
	# Maybe we could check that the leading character does not
	# imply longer than what we have... but what we do if it
	# implies shorter: replace it? assume multi-line and check
	# the next "leading" character? ... so we just do not care.
	$c = " --uu $_$c";
	$_ = unpack("u", $_);
      }
      elsif (m/(%u00|\\u00|%|=)[0-9a-f]{2}/i) {
	# %u00XX: IIS %u (Unicode?) encoding: is it %u or \u? Do we worry about %uXXYY?
	# %XX: URI
	# =XX: MIME
	$c = " --uXX $_$c";
	s/(?:%u00|\\u00|%|=)([0-9a-f]{2})/chr(hex($1))/ige;
	chomp;
      }
      elsif (m/\&(lt|gt|quot|nbsp|amp);/i or m/\&(#0*[0-9]{1,3}([^0-9]|$)|#x0*[0-9a-f]{2}([^0-9a-f]|$))/i) {
	# HTML, cf http://www.w3.org/TR/html4/charset.html
	$c = " --html $_$c";
	s/\&#0*([0-9]{1,3});/chr($1)/ige;
	s/\&#0*([0-9]{1,3})([^0-9]|$)/chr($1).$2/ige;
	s/\&#x0*([0-9a-f]{2});/chr(hex($1))/ige;
	s/\&#x0*([0-9a-f]{2})([^0-9a-f]|$)/chr(hex($1)).$2/ige;
	s/\&lt;/</ig;
	s/\&gt;/>/ig;
	s/\&quot;/\"/ig;
	s/\&nbsp;/ /ig;
	s/\&amp;/\&/ig;
	chomp;
      }
      elsif (m/=\?[\w-]+\?[qb]\?/i) {
	# MIME 'encoded word', RFC2047
	$c = " --cw $_$c";
	s/=\?[\w-]+\?b\?([A-Za-z0-9+\/]{2,})/&base64($1)/ige;
	s/=\?[\w-]+\?[qb]\?//ig;
	s/\?=//ig;
	chomp;
      }
      elsif (m/\+[A-Za-z0-9+\/]{2}/i) {
	# UTF-7, RFC2152
	$c = " --utf7 $_$c";
	s/\+([A-Za-z0-9+\/]{2,})-?/&base64($1)/ige;
	s/\+-/+/ig;
	chomp;
      }
      elsif (m/[\x80-\xff]/) {
	# Do nothing here, but do
	#tr/[\x80-\xff]/[\x00-\x7f]/;
	# below: apparently IE interprets things that way, see
	# http://www.securityfocus.com/archive/1/437948
      }
      # What about UTF-8, RFC2279 ??
      else {
	last MATCH;
      }

      # PC executable: most (but not all!) com cpl dll drv exe ocx scr sys
      $p = "*PC EXE header", $_ = '(PC EXE header)', last MATCH if
	m/^MZ.(\x00|\x01).\x00\x00\x00(\x00|\x02|\x04|\x20)\x00(\x00|\x0f)\x00(\xff\xff|PE).(\x00|\x01)/s;

      $p = "*PC CHM header", $_ = '(PC CHM header)', last MATCH if
	m/^ITSF\x03\x00\x00\x00\x60\x00\x00\x00\x01\x00\x00\x00/;

      $p = "*ZIP header (or StarOffice sxw file)", $_ = '(ZIP header)', last MATCH if
	m/^PK\x03\x04.\x00(\x00|\x01|\x02|\x08)\x00(\x00|\x08)\x00/s;

      $p = "*RAR header", tr/[ -~]/ /c, last MATCH if
	m/^Rar!/s;

      # Word file with macros, or embedded objects
      $p = "*MSWord macro", $_ = 'MSWord Macros', last MATCH if
	m/M\x00a\x00c\x00r\x00o\x00s/;
#      $p = "*MSWord embedded object", $_ = 'MSWord EMBED ', last MATCH if
#	m/EMBED /;
#      $p = "*MSWord (embedded) ObjInfo", $_ = 'MSWord ObjInfo', last MATCH if
#	m/O\x00b\x00j\x00I\x00n\x00f\x00o/;

      $p = "*Corel WP header", $_ = '(WordPerfect header)', last MATCH if
	m/^\xFFWPC/;

      $p = "*MSJet MDB header", $_ = '(MSJet MDB header)', last MATCH if
	m/^....(Jet System DB  |Standard Jet DB|Temp Jet DB    |MSISAM Database)/;

      tr/[\x80-\xff]/[\x00-\x7f]/;
      s/\x00//g;
#      s/[\x00-\x1f]//g;
      tr/[ -~]/ /c;
      if ($debug) { print "Redo $_$c"; print "\n" unless $c =~ m/\n$/ }
      redo MATCH;
    }

    if ($p) {
      $titl = "$ARGV: $from$from2$to $date $subj";
      if ($usydmail) {
	$titl = "$ARGV: (USyd) $from$from2$to $date $subj";
      }
      if ($prevtitl ne $titl) {
	print "- $titl\n";
	$prevtitl = $titl;
      }
      print substr($p,1) . ":\n" if $debug;
      print substr($p,0,1) . " $ARGV:$_$c";
      $wordfile = 0;
    }
    if ($wordfile) {
      $wordfile-- unless $c =~ m/ --b64 /;
      if ($wordfile < 1) {
	$wordfile = 0;
	print "wordfile off: $_\n" if $debug;
      }
    }
}
