#!/usr/bin/perl -w # hwinsmail - Insert email into a HyperWave server # Maintained by xanni@xanadu.net (Andrew Pam) # Copyright (c) 1997-98 Xanadu Australia # # Date Who Description # ---------- --- ----------------------- # 02/03/1997 ADP Created initial version # 03/03/1997 ADP Added checksum and wordcase # 04/03/1997 ADP Added message testing and insertion # 07/03/1997 ADP Added TimeCreated and Keyword headers, HTML support # 08/03/1997 ADP Added setext and MIME quoted-printable support, etc. # 09/03/1997 ADP Added replace mode # 10/03/1997 ADP Fixed signatures inside blockquotes # 13/03/1997 ADP Now adds new documents even in replace mode # 28/07/1997 ADP Fixed Keyword header creation and date processing # 16/08/1997 ADP Added References header support, better error reporting # Added support for MIME quoted-printable ISO-8859-1 charset # 27/10/1998 ADP Fixed warnings, added support for file-per-message format # 02/11/1998 ADP Fixed some minor bugs with quoted-printable and with quotes # # TODO: # Read configuration file # Support copying (existing) messages into multiple collections # Set DocAuthor attribute to the original email author # Add support for Usenet news headers # Use Hyper-G protocol for speed # ---------- Configuration ---------- # PATH $ENV{PATH} = "/bin:/usr/bin:/usr/local/bin:/home/hwsystem/bin/LINUX_ELF"; # Temporary directory $tmpdir = "/tmp/"; # HyperWave username and password $ident = "-ident 'hwmail XXXXXX'"; # HyperWave rights #$rights = "-rights 'R:g xxxxxxxx'"; $rights = ""; # Collection to insert into $parent = "hwmail/xanadu"; # Use mbox format (look for "From " lines) $mbox = 0; # Replace existing documents $replace = 0; # ----------------------------------- # Date::Format and Date::Parse are part of the TimeDate module available at # http://www.perl.org/CPAN/CPAN.html use Date::Format; use Date::Parse; # checksum: Generate a simple checksum sub checksum { # Sizeof(long) my $LONG = 4; my $string = join("", @_); my $length = length $string; $string .= "\0" x $LONG; my $checksum = 0; my $i = 0; for ($i = 0; $i < $length; $i += $LONG) { $checksum += unpack("L", substr($string, $i, $LONG)); } return $checksum; } # formatheader: HTML format header sub formatheader { return "$_[0]" . "$_[1]\n"; } # hyperlink: Generate HTML link sub hyperlink { $_ = shift; s/&/&/g; s/),"\&\s]*[^<>),."\&\s])&$1$2&g; s&(mailto|news|telnet)(:[^<>)\],"\s]+)&$1$2&g; s&(^|\s)(ftp|gopher)(\.[\w./-]+)(\s|$)&$1$2$3$4&g; s&(^|\s)(www\.[\w./-]+)(\s|$)&$1$2$3&g; s&(article|In-Reply-To:|Message-ID:|References:)(\s+\<)([^@>"\s]+@[^>"\s]*)&$1$2$3&gi; s/(^|^"|[^=]"|SMTP:|[()\[\]\s]|(<)+)([^&()>":\[\]\s]+@)([\w\-.]+\.[\w\-]+|\[\d+\.\d+\.\d+\.\d+\])/$1$3$4<\/A>/g; return $_; } # wordcase: Capitalise first letter of each word sub wordcase { my @words = split(/\b/, shift); foreach (@words) { $_ = "\u\L$_"; } return join("", @words); } # Check that parent collection exists! die "Collection $parent not found.\n" unless hex `hwinfo $ident -object $parent -ids 2>/dev/null`; if ($mbox) { # Skip to start of first message while (defined($_ = <>) and !/^From /) {}; die "No valid messages found.\n" unless $_; } # Process messages while (<>) { # Get headers if (/^([^:\s]+):\s*(.*)$/) { $header = wordcase($1); $header{$header} = $2; } # Handle continuations elsif (/^\s+(.+)$/) { $header{$header} .= " $1"; } # First blank line ends headers elsif (/^$/) { # Fix any missing headers $header{"From"} = $header{"Apparently-From"} unless $header{"From"}; $header{"From"} = $header{"Reply-To"} unless $header{"From"}; $header{"From"} = "anonymous" unless $header{"From"}; $header{"Subject"} = "(no subject)" unless $header{"Subject"}; if (!$header{"Message-Id"}) { if ($header{"Resent-Message-Id"}) { $header{"Message-Id"} = $header{"Resent-Message-Id"}; } else { $header{"Message-Id"} = "<" . checksum(%header) . "\@" . substr(`uname -n`, 0, -1) . ">"; } } # DEBUG # foreach (keys %header) # { print "$_: $header{$_}\n"; } # Check if message already exists ($MID) = $header{"Message-Id"} =~ /<(.*)>/; if (!hex `hwinfo $ident -object $MID -ids 2>/dev/null`) { $mode = "-parent $parent -name"; } elsif ($replace) { $mode = "-replace"; } else { $mode = ""; print "<$MID> already exists, skipping\n"; $_ = <> until eof or ($mbox and /^From /); } if ($mode) # Insert message { open(MESSAGE, ">${tmpdir}message") or die; select MESSAGE; ($title = $header{"Subject"}) =~ tr/<>//d; print "$title\n"; ($keyword = $header{"From"}) =~ tr/<>"/ /s; print "\n"; print ""; print formatheader("Date:", $header{'Date'}) if $header{"Date"}; print formatheader("From:", hyperlink $header{"From"}); print formatheader("Organization:", hyperlink $header{'Organization'}) if $header{"Organization"}; print formatheader("Reply-To:", hyperlink $header{"Reply-To"}) if $header{"Reply-To"}; print formatheader("Subject:", hyperlink $header{"Subject"}); if ($header{"In-Reply-To"}) { hyperlink "In-Reply-To: $header{'In-Reply-To'}"; s/^In-Reply-To: //; print formatheader("In-Reply-To:", $_); } if ($header{"References"}) { $references = ""; foreach $ref (split /\s+/, $header{"References"}) { hyperlink "References: $ref"; s/^References: //; $references .= "$_ "; } print formatheader("References:", $references); } print "

\n"; $quote = $sig = 0; while (defined($_ = <>) and !eof and (!$mbox or !/^From /)) { s/^>From /From / if $mbox; # Unprotect "From " if (/[^=][^=]=$/) { chomp; chop; $_ .= <>; } # Handle MIME continuations s/=20(\s|$)/ /g; # Specialcase encoded spaces hyperlink $_; s/=(\w\w)(\s|$)/"&#".hex($1).";"/gie; # Convert quoted-printable if (s/^([>:]\s*)+(.*)$/$2/) # Handle quoted text { print "

\n" unless $quote; $quote = 1; } elsif ($quote) { print "" if $sig; print "
\n"; $quote = $sig = 0; } s/^\s*$/

/; # Mark paragraph breaks s/(^|\s)\*\*([^*\s])/$1$2/g; # setext start strong s/([^*\s])\*\*(\s|$)/$1<\/STRONG>$2/g; # setext end strong s/(^|\s)_(\w[^_]*\w)_(\W|$)/$1$2<\/EM>$3/g; # setext emphasis s/(^|\s)\*(\w[^*]*\w)\*(\W|$)/$1$2<\/B>$3/g; # Handle bold $_ = "
$_
" if /^\s*([-=_*.] ?){3,}$/; $_ = "
$_" if /^\s*\*\s+/; # Handle bullets if (/^--\s*$/) # Handle signatures { $sig = 1; $_ = "

$_"; }
        elsif (/\cI/)				# Tabs imply preformatted
        { $_ = "
$_
\n"; } print; } print "" if $quote; print "
\n" if $sig; print "\n"; select STDOUT; close MESSAGE; $time = $header{"Date"} ? str2time($header{"Date"}) : time; $cdate = time2str("%Y/%m/%e %T", $time, "UTC"); print STDERR "Inserting <$MID>: "; system("hwinsdoc $ident -cdate '$cdate' $rights -mime text/html" . " $mode $MID -path ${tmpdir}message 2>${tmpdir}hwinsdoc-errors"); # " $mode $MID -path ${tmpdir}message 2>/dev/null"); # " $mode $MID -path ${tmpdir}message"); print STDERR $? ? "Error:\n" : "Success.\n"; if ($?) { open(TEMP, "<${tmpdir}hwinsdoc-errors"); undef $/; # Process entire file print STDERR ; close(TEMP); } } undef %header; # last unless $_; last if eof(); } # undef %header if eof or ($mbox and /^From /); } unlink "${tmpdir}hwinsdoc-errors"; unlink "${tmpdir}message";