#!/usr/bin/perl -w # # saf_http_out for Safari # Copyright (c) 1999 by Barrie Slaymaker, rbs@telerama.com # # You may distribute under the terms of either the GNU General Public # License or the Artistic License, as specified in the README file. # =head1 NAME saf_http_out - Converts an input file in to an http file, safari style =head1 SYNOPSIS saf_http_out --source-file "co/out.c" \ --header "Content-Type: text/html" [file ...] --outfile=http/out.c saf_http_out --a "--header=Content-Type: text/html" [file ...] -o=out.http saf_http_out --header "Content-Type: text/html" =head1 DESCRIPTION This script prepends the headers given on the command line to it's input file passed. New headers can go before or after existing headers. The source files are usually raw test, HTML, XML, or media files, and the result file (or STDOUT if no outfile is specified), should be suitable for returning from a cgi-bin application. This is a standard unix style filter program, so input can come from the standard input (stdin) or from filenames specified on the command line. Also, all output is sent to the standard output unless the '--outfile=file' option is given. Headers are emitted in command line order, optionally before or after existing headers. Options may be added to allow replacing some or all header lines in the future. Unless '--before-existing' or '--after-existing' are specified, the input file is assumed to contain no HTTP headers. If they are specified, all lines up to the first blank line are assumed to be headers. Catenates files together if more than one is provided. HTTP headers that happen to appear in second and later files are not given any special treatment: they are treated just like a data file. Only HTTP headers in the first file are seen, and then only if an appropriate command line option has been given. It is considered an error for 0 bytes to have been read in (after any input headers), in which case the result file is deleted. =head2 OPTIONS =over =item --after-existing, -a Specifies that the input stream may contain HTTP headers and does contain a blank line after those headers (whether or not they exist). Headers on the command line will be inserted between the existing headers and the blank line. =item --before-existing, -b Specifies that the input stream may contain HTTP headers and does contain a blank line after those headers (whether or not they exist). Headers on the command line are output first, then the =item --header, -h --header and -h specify a header line to be emmitted. A "\r\n" will be emitted after every header line. =item --file-type Specifies the file type ('text', 'c', 'perl', 'html', etc) for the input file. Supresses all internal checks for file type. You may want to use --outfile-type to force the input file to be output as plain text. This is no longer needed for it's original purpose (--PRE supercedes it), but is left here in case of trouble. Far better to extend the File::Type module if the file type you need is standard enough that other people would benefit from the extension. The most common use for this is to prevent HTML-like text from being identified as HTML: if you're serving something that really is text, but that might look enough like HTML that File::Type could be fooled, use --file-type=text to prevent such foolishness. =item --outfile, -o Specifies the output filename. This may be preferable to redirecting standard output (stdout) since the file will be deleted if errors are encountered. If the directory containing the outfile does not exist, it (and any others down to it) are created with permissions 0775 (minus those masked out by umask). =item --PRE If the file has a mime type ("Content-type:") of text (eq text/plain text/html, text/sgml, etc), it will be converted to a title-less HTML page with a text/html mime type by wrapping it in a EPREE block. This allows text that a browser would otherwise try to interpret to be viewed as plain text. Characters such as '<' are encoded to be entities like '<'. This is the standard behavior for files of type 'text/plain', so specifying --PRE in addition to --file-type=text is not necessary. =item --source-file, -s Specifies the original file in the event that the input to saf_http_out has been processed (usually by a something-to-HTML converter). This allows saf_http_out to determine the original file type and mime type when deciding what filters and tools are relevant. Note that for some converters, you probably want to look at the file type of the resulting file. This is especially true for things that get inlined in HTML pages like images: the source file might be something like an xfig file, but you don't want Safari thinking that the resulting file should be dressed up like a text file. =item Header macros Some commonly used strings within headers have abbreviations, called macros. These look like ':text:, where 'text' is the name of the macro. 'text' must be comprised of the characters a-z, A-Z, 0-9, and '-'. =over =item :c-t: Specifying -h ':c-t: ' emits 'Content-type: text/plain' =item :html: Content-type: text/html =item :loc: Abbreviation for "Location:" =back =back =AUTHOR Barrie Slaymaker =cut # # Each input file specified (or stdin) is opened and read manually in order # to trap I/O errors. # use strict ; use Getopt::Long ; use File::Basename ; use File::Type qw( get_type type_2_mime ) ; use File::Path ; use HTML::Entities qw( encode_entities ) ; use vars qw( @option_specs ) ; my $progname = basename( $0 ) ; my $edit_modules = {} ; # # @options is often modified by the config file or by modules it uses, since # there are options that may control the editing of the files beign served. # @option_specs = ( 'after-existing|a', 'before-existing|b', # 'expires|e=i', 'file-name|filename=s', 'file-type=s', 'header|h=s@', 'mime-type=s', 'edit!', 'outfile|o=s', 'PRE', 'source-file|s=s', 'source-file-type=s', 'source-mime-type=s', 'title=s', ) ; my $options = {} ; $options->{'edit'} = 1 ; die "No configuration directory set (\$ENV{SAF_CONF_DIR} not defined)" unless defined( $ENV{SAF_CONF_DIR} ) && length( $ENV{SAF_CONF_DIR} ) ; my $config_file = "$ENV{SAF_CONF_DIR}/$progname.conf" ; if ( -f $config_file ) { my $result = do $config_file ; die "$@ in $config_file" if $@ ; die "$!: $config_file" unless defined $result ; die "$config_file returned undef" unless $result ; } GetOptions( $options, @option_specs ) ; my $dest = '' ; my $file_name = $options->{'file-name'} ; $file_name = $ARGV[0] unless defined $file_name ; $file_name = $options->{'outfile'} unless defined $file_name ; if ( $options->{'outfile'} ) { $dest = $options->{'outfile'} ; my ( undef, $dest_dir ) = fileparse( $dest ) ; $dest_dir =~ s@/+$@@ ; mkpath( $dest_dir, 0, 0775 ) unless -d $dest_dir ; die "unable to create '$dest_dir' to hold '$dest'" unless -d $dest_dir ; open( STDOUT, ">$dest" ) or die "$! opening $dest" ; my $cleanup_name = $dest ; $SIG{__DIE__} = sub { print( STDERR "Removing '$cleanup_name'\n" ) ; close( STDOUT ) ; unlink( $cleanup_name ) or die "$! unlinking '$cleanup_name'" ; } ; $dest = " $dest" ; } my %abbrevs = ( ':c-t:' => 'Content-type:', ':html:' => 'Content-type: text/html', ':loc:' => 'Location:', ':plain:' => 'Content-type: text/plain', ) ; map{ s{(:[a-zA-Z0-9-]+:)}{ die "Unrecognized abbreviation: $1" unless defined $abbrevs{$1} ; $abbrevs{$1} ; }ge ; } @{$options->{'header'}} ; binmode( STDOUT ) ; # # If we are to place our headers before any existing headers, then # we don't need to insert a blank line: there's one already after the # existing headers. # my $blank_line = defined( $options->{'before-existing'} ) ? '' : "\r\n" ; open_first() ; my @headers ; if ( $options->{'after-existing'} ) { while ( defined( $_ = get_chunk() ) ) { last if /^\r?\n/ ; push( @headers, $_ ) ; } $blank_line = $_ ; } if ( defined $options->{'header'} ) { push( @headers, @{$options->{'header'}} ) ; } # # Do the rest of the input in bulk. Not a good idea for # huge files, but it's good enough for now. # $/ = undef ; my $contents ; my $read_something = defined( $contents = get_chunk() ) ; close_last() ; die "Content is 0 length" unless $read_something ; my $file_type = defined( $options->{'file-type'} ) ? $options->{'file-type'} : get_type( { follow => 1 }, undef, $contents ) ; my $mime_type = defined( $options->{'mime-type'} ) ? $options->{'mime-type'} : type_2_mime( $file_type ) ; my $source_file_type = defined( $options->{'source-file-type'} ) ? $options->{'source-file-type'} : defined( $options->{'source-file'} ) ? get_type( { follow => 1 }, $options->{'source-file'} ) : $file_type ; my $source_mime_type = defined( $options->{'source-mime-type'} ) ? $options->{'source-mime-type'} : type_2_mime( $source_file_type ) ; if ( $mime_type eq 'text/plain' || $options->{'PRE'} && substr( $mime_type, 0, 5 ) eq 'text/' ) { encode_entities( $contents ) ; my $title = defined( $options->{'title'} ) ? $options->{'title'} : defined( $ENV{SAF_FILE} ) ? $ENV{SAF_FILE} : defined( $options->{'source-file'} ) ? $options->{'source-file'} : $file_name ; $title = '' unless defined( $title ) ; $contents = qq{ $title
$contents
}; $mime_type = 'text/html' ; } # # Store file types where the edit routine can find 'em # $options->{'file-type'} = $file_type ; $options->{'mime-type'} = $mime_type ; $options->{'source-file-type'} = $source_file_type ; $options->{'source-mime-type'} = $source_mime_type ; # # Add some headers # my %seen_headers ; map{ my ( $header_name ) = m/^(\S+):/ ; $seen_headers{lc($header_name)} = 1 ; } @headers ; # # We treat the last-modified time as the time of the supplied # file names or the current time. We should probably _not_ use # the output filename here, but that's the way $file_name works # for right now, unless an input filename was specified. # unless ( exists $seen_headers{'last-modified'} ) { my $mtime ; if ( defined $file_name && -e $file_name ) { $mtime = ( stat( _ ) )[ 9 ] ; } else { $mtime = time ; } push( @headers, 'Last-Modified: ' . format_GMT( $mtime ) ) ; } # # Need to do something with expires, not sure what, yet. Probably want # to allow a delta expires tag that cgimake can fix up to be absolute. # that way we tell the remote cache to recheck every so often. # #unless ( exists $seen_headers{'expires'} ) { # # # # Default to 1 hour # # # my $expires = $options->{'expires'} ; # $expires = 60 * 60 # unless defined $expires ; # # push( @headers, 'Expires: ' . format_GMT( time + $expires ) ) ; #} # # # If the config file defined an editing routine, call it. # edit( $options, \@headers, \$contents ) if ( defined &edit && $options->{'edit'}) ; push( @headers, "Content-type: $options->{'mime-type'}" ) if ! exists $seen_headers{'content-type'} && defined( $mime_type ) && length( $mime_type ) ; # # We do content-length last: there's no need to pass it to the # edit routines, since they can call length(), and we like to # allow the edit routines to not have to create or update it. # push( @headers, 'Content-Length: ' . length( $contents ) ) unless exists $seen_headers{'content-length'} ; print( join( "\r\n", @headers, '' ), $blank_line, $contents ) or die "$! writing$dest" ; exit 0 ; ######################################################################### sub open_first { if ( @ARGV ) { open_next() ; } else { open( IN, "<&STDIN" ) ; } } sub open_next { $ARGV = shift @ARGV ; open( IN, "<$ARGV" ) or die "$! '$ARGV'" ; } sub get_chunk { if ( eof( IN ) ) { close( IN ) ; return undef unless @ARGV ; open_next() ; } my $next = ; die "$!: $ARGV" unless defined $next ; return $next ; } sub close_last { close( IN ) ; } sub format_GMT { my @time_fields = gmtime( shift ) ; my @months = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov ) ; my @days = qw( Sun Mon Tue Wed Thu Fri Sat Sun ) ; return sprintf( "%s, %d %s %d %02d:%02d:%02d GMT", $days[$time_fields[6]], $time_fields[ 3 ], $months[$time_fields[ 4 ]], $time_fields[ 5 ] + 1900, $time_fields[ 2 ], $time_fields[ 1 ], $time_fields[ 0 ] ) }