package StatusPage; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); our @EXPORT = qw(); our @EXPORT_OK = qw( arg_checks arg_end parse_uploaded opt_uploaded parse_db opt_db parse_db_or_uploaded opt_db_or_uploaded parse_choice opt_choice parse_encoded opt_encoded parse_integer opt_integer parse_number opt_number parse_evalue opt_evalue parse_safe opt_safe ); use Carp qw(croak); use Cwd qw(getcwd); use Encode qw(decode); use Fcntl qw(O_APPEND O_CREAT O_WRONLY O_TRUNC); use File::Basename qw(fileparse); use File::Spec::Functions qw(catfile splitdir splitpath tmpdir); use HTML::Template; use Sys::Hostname; use Time::HiRes; use lib qw(/mit/meme_v4.11.4/lib/perl); use Alphabet qw(dna rna protein); use ExecUtils qw(invoke stringify_args); use Globals; # Setup logging my $logger = undef; eval { require Log::Log4perl; Log::Log4perl->import(); }; unless ($@) { Log::Log4perl::init('/mit/meme_v4.11.4/etc/logging.conf'); $logger = Log::Log4perl->get_logger('meme.service.utils'); } my $tmp_dir = ''; $tmp_dir = &tmpdir() if ($tmp_dir eq '' || $tmp_dir =~ m/^\@TMP[_]DIR\@$/); # Checks that a file name has only whitelisted characters in it and does # not have a leading dash sub is_safe_name { $logger->trace('call is_safe_name') if $logger; my ($name) = @_; if ($name =~ /^[a-zA-Z0-9_\.\-]+$/ && $name !~ /^-/ && $name ne '.' && $name ne '..') { return 1; } return 0; } sub _no_up_dirs { my ($path) = @_; my ($vol, $dir_path, $file_name) = splitpath($path); my @dirs = splitdir($dir_path); return 0 == grep { $_ eq '..' || $_ eq '.' } @dirs; } sub _prepend_link_name { my ($path, $link_name) = @_; my ($vol, $dir_path, $file_name) = splitpath($path); my @dirs = splitdir($dir_path); return catfile($link_name, @dirs, $file_name); } sub find_in_dir { my ($dir, $pattern, $link_name) = @_; # record the current directory so we can return to it my $working_dir = getcwd(); # change to the specified directory so we can use the glob command chdir($dir); # use the glob command to find the files my @files = glob($pattern); # eliminate any files that are above the given directory @files = grep { _no_up_dirs($_) } @files; # change the working directory back to the original value chdir($working_dir); # return the files (relative to the directory given) if (defined($link_name)) { @files = map { _prepend_link_name($_, $link_name) } @files; } return @files; } # # arg_checks # Check untagged arguments. # sub arg_checks { my @arg_fns = @_; my $index = 0; my $check_fn = sub { my ($arg) = @_; my $fn = ($index < scalar(@arg_fns) ? $arg_fns[$index] : $arg_fns[-1]); $fn->($index, $arg); $index += 1; } } # # arg_end # Tag the ending of the arguments. # sub arg_end { my $check_fn = sub { my ($opt, $arg) = @_; my $desc = &_opt_desc($opt); die("Value \"$arg\" ($desc) is surplus to requirements\n"); }; return $check_fn; } # # parse_uploaded # Parses an uploaded file to ensure it exists and fits the allowed naming scheme # sub parse_uploaded { my ($opt, $filepath, $out_ref, $validate_fn) = @_; my $desc = &_opt_desc($opt); # uploaded files are dropped directly in the working directory so we remove all path my $filename = fileparse($filepath); if (not &is_safe_name($filename)) { die("Value \"$filename\" invalid for $desc (does not fit allowed file name pattern)\n"); } elsif (not -e $filename) { die("Value \"$filename\" invalid for $desc (file does not exist)\n"); } if (defined($validate_fn)) { my $err = $validate_fn->($filename); die("Value \"$filename\" invalid for $desc ($err)\n") if ($err); } if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, $filename); } else { $$out_ref = $filename; } } # # opt_uploaded # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the file exists and does not contain odd characters # that could mess with command line construction. # sub opt_uploaded { my ($out_ref, $validate_fn) = @_; my $check_fn = sub { my ($opt, $filepath) = @_; &parse_uploaded($opt, $filepath, $out_ref, $validate_fn); }; return $check_fn; } # # parse_uploaded # Parses an database name to ensure it exists. # sub parse_db { my ($opt, $file_pattern, $out_ref, $db_dir, $db_link) = @_; my $desc = &_opt_desc($opt); my @files = &find_in_dir($db_dir, $file_pattern, $db_link); unless (@files) { die("Value \"$file_pattern\" invalid for $desc (does not match any files)\n"); } if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, @files); } else { if (scalar(@files) > 1) { die("Value \"$file_pattern\" invalid for $desc (matches multiple files when only one was expected)\n"); } $$out_ref = $files[0]; } } # # opt_db # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the file exists within the db directory. # sub opt_db { my ($out_ref, $db_dir, $db_link) = @_; my $check_fn = sub { my ($opt, $file_pattern) = @_; &parse_db($opt, $file_pattern, $out_ref, $db_dir, $db_link); }; return $check_fn; } # # parse_db_or_uploaded # Parses a file name pattern that could be a db. # Looks for 'db/' to indicate that it is a database but # otherwise requires that it is a file in the current directory. # sub parse_db_or_uploaded { my ($opt, $file_pattern, $out_ref, $db_dir, $db_link) = @_; my $desc = &_opt_desc($opt); my @files; if ($file_pattern =~ m/^db\//) { @files = &find_in_dir($db_dir, substr($file_pattern, 3), $db_link); unless (@files) { die("Value \"$file_pattern\" invalid for $desc (does not match any files)\n"); } } else { my $filename = fileparse($file_pattern); if (not &is_safe_name($filename)) { die("Value \"$filename\" invalid for $desc (does not fit allowed file name pattern)\n"); } elsif (not -e $filename) { die("Value \"$filename\" invalid for $desc (file does not exist)\n"); } @files = ($filename); } if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, @files); } else { if (scalar(@files) > 1) { die("Value \"$file_pattern\" invalid for $desc (matches multiple files when only one was expected)\n"); } $$out_ref = $files[0]; } } # # opt_db_or_uploaded # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the file exists within the db directory. # sub opt_db_or_uploaded { my ($out_ref, $db_dir, $db_link) = @_; my $check_fn = sub { my ($opt, $file_pattern) = @_; &parse_db_or_uploaded($opt, $file_pattern, $out_ref, $db_dir, $db_link); }; return $check_fn; } # # parse_safe # Parses a value and checks that it should be fine to use on a command line. # sub parse_safe { my ($opt, $value, $out_ref) = @_; my $desc = &_opt_desc($opt); if (not &is_safe_name($value)) { die("Value \"$value\" invalid for $desc (does not fit allowed command-line safe pattern)\n"); } if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, $value); } else { $$out_ref = $value; } } # # opt_safe # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the text is a safe name. # sub opt_safe { my ($out_ref) = @_; my $check_fn = sub { my ($opt, $value) = @_; &parse_safe($opt, $value, $out_ref); }; return $check_fn; } # # parse_choice # Parses a value and ensures it is one of the allowed choices. # sub parse_choice { my ($opt, $value, $out_ref, @choices) = @_; my $desc = &_opt_desc($opt); foreach my $choice (@choices) { if ($value eq $choice) { if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, $choice); } else { $$out_ref = $choice; } return; } } my $options = (@choices > 1 ? join(', ', @choices[0..-1]) . ' or ' : '') . $choices[-1]; die("Value \"$value\" invalid for $desc ($options expected)\n"); } # # opt_choice # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the text is one of the choices # sub opt_choice { my ($out_ref, @choices) = @_; my $check_fn = sub { my ($opt, $value) = @_; &parse_choice($opt, $value, $out_ref, @choices); }; return $check_fn; } sub parse_encoded { my ($opt, $value, $out_ref) = @_; my $desc = &_opt_desc($opt); # copy the value my $temp_value = $value; # decode modified URL encoding ('_' instead of '%') to UTF-8 bytes $temp_value =~ s/_([0-9A-Fa-f]{2})/chr(hex($1))/eg; # decode UTF-8 bytes to internal Perl string my $decoded_value = decode("utf8", $temp_value); # set the output if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, $decoded_value); } else { $$out_ref = $decoded_value; } } # # opt_encoded # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the text is one of the choices # sub opt_encoded { my ($out_ref) = @_; my $check_fn = sub { my ($opt, $value) = @_; &parse_encoded($opt, $value, $out_ref); }; return $check_fn; } # # parse_integer # Parses an integer, ensures that the integer x is min <= x <= max # sub parse_integer { my ($opt, $integer, $out_ref, $min, $max) = @_; my $desc = &_opt_desc($opt); if (defined($min) && $integer < $min) { die("Value $integer invalid for $desc (value >= $min expected)\n"); } elsif (defined($max) && $integer > $max) { die("Value $integer invalid for $desc (value <= $max expected)\n"); } if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, $integer); } else { $$out_ref = $integer; } } # # opt_integer # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the integer x is min <= x <= max # sub opt_integer { my ($out_ref, $min, $max) = @_; my $check_fn = sub { my ($opt, $integer) = @_; &parse_integer($opt, $integer, $out_ref, $min, $max); }; return $check_fn; } # # parse_number # Ensures that the number passes all the constraints specified # as operator + value pairs. # sub parse_number { my ($opt, $number, $out_ref, @op_vals) = @_; my $desc = &_opt_desc($opt); for (my $i = 0; ($i + 1) < scalar(@op_vals); $i += 2) { my $op = $op_vals[$i]; my $val = $op_vals[$i + 1]; my $test = 0; if ($op eq '<') { $test = $number < $val; } elsif ($op eq '<=') { $test = $number <= $val; } elsif ($op eq '==') { $test = $number == $val; } elsif ($op eq '>=') { $test = $number >= $val; } elsif ($op eq '>') { $test = $number > $val; } else { die("Value $number for $desc could not be tested due to unknown operator $op\n"); } die("Value $number invalid for $desc (value $op $val expected)\n") unless $test; } if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, $number); } else { $$out_ref = $number; } } # # opt_number # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the number passes all the constraints specified # as operator + value pairs. # sub opt_number { my ($out_ref, @op_vals) = @_; my $check_fn = sub { my ($opt, $number) = @_; &parse_number($opt, $number, $out_ref, @op_vals); }; return $check_fn; } # # parse_evalue # Ensures that the evalue is positive. # sub parse_evalue { my ($opt, $evalue, $out_ref) = @_; my $desc = &_opt_desc($opt); if ($evalue <= 0) { die("Value $evalue invalid for $desc (not a valid e-value)\n"); } if (ref($out_ref) eq 'ARRAY') { push(@{$out_ref}, $evalue); } else { $$out_ref = $evalue; } } # # opt_evalue # Returns a checker for use with Getopt::Long GetOptions method. # Checker ensures that the evalue is positive. # sub opt_evalue { my ($out_ref) = @_; my $check_fn = sub { my ($opt, $evalue) = @_; &parse_evalue($opt, $evalue, $out_ref); }; return $check_fn; } sub new { $logger->trace('call new StatusPage') if $logger; my $classname = shift; my $self = {}; bless($self, $classname); $self->_init(@_); return $self; } sub _init { $logger->trace('call StatusPage::_init') if $logger; my $self = shift; my ($program, $argv, %opts) = @_; my @argv_copy = @{$argv}; $self->{program} = $program; $self->{argv} = \@argv_copy; $self->{when} = [&Time::HiRes::gettimeofday()]; $self->{page} = (defined($opts{PAGE}) ? $opts{PAGE} : 'index.html'); $self->{log} = (defined($opts{LOG}) ? $opts{LOG} : lc($program) . '-log'); $self->{output} = (defined($opts{OUTPUT}) ? $opts{OUTPUT} : 'messages.txt'); $self->{status} = ''; $self->{files} = []; $self->{file_keys} = {}; $self->{messages} = []; $self->{cleanup} = sub {1;}; } sub set_cleanup { my $self = shift; my ($cleanup_fn) = @_; $self->{cleanup} = $cleanup_fn; } sub add_file { $logger->trace('call StatusPage::add_file') if $logger; my $self = shift; my ($key, $file, $desc, %opts) = @_; croak("Duplicate file key \"$key\"") if $self->{file_keys}->{$key}; my $entry = {key => $key, file => $file, desc => $desc}; my $target_key = $opts{BEFORE} || $opts{AFTER}; if ($target_key) { my $target_i; for ($target_i = 0; $target_i < scalar(@{$self->{files}}); $target_i++) { last if ($self->{files}->[$target_i]->{key} eq $target_key); } splice(@{$self->{files}}, ($opts{BEFORE} ? $target_i : $target_i + 1), 0, $entry); } elsif ($opts{INDEX}) { splice(@{$self->{files}}, $opts{INDEX}, 0, $entry); } else { push(@{$self->{files}}, $entry); } $self->{file_keys}->{$key} = 1; } sub update_file { $logger->trace('call StatusPage::update_file') if $logger; my $self = shift; my ($key, %values) = @_; croak("File key \"$key\" does not exist") unless $self->{file_keys}->{$key}; for (my $i = 0; $i < scalar(@{$self->{files}}); $i++) { if ($self->{files}->[$i]->{key} eq $key) { my $entry = $self->{files}->[$i]; $entry->{file} = $values{FILE} if (defined($values{FILE})); $entry->{desc} = $values{DESC} if (defined($values{DESC})); last; } } } sub remove_file { $logger->trace('call StatusPage::remove_file') if $logger; my $self = shift; my ($key) = @_; return unless $self->{file_keys}->{$key}; for (my $i = 0; $i < scalar(@{$self->{files}}); $i++) { if ($self->{files}->[$i]->{key} eq $key) { splice(@{$self->{files}}, $i, 1); last; } } delete $self->{file_keys}->{$key}; } sub add_message { $logger->trace('call StatusPage::add_message') if $logger; my $self = shift; my ($message) = @_; push(@{$self->{messages}}, {msg => $message}); } sub update { $logger->trace('call StatusPage::update') if $logger; my $self = shift; my ($status) = @_; $status = '' unless defined $status; my @found_files = (); foreach my $entry (@{$self->{files}}) { my $file = $entry->{'file'}; my $desc = $entry->{'desc'}; if (defined($file) && -e $file && -s $file) { push(@found_files, {file => $file, desc => $desc}); } } my $fh; sysopen($fh, $self->{page}, O_CREAT | O_WRONLY | O_TRUNC) or _log_and_die("Failed to open \"" . $self->{page} . "\"."); my $template = HTML::Template->new(filename => '/mit/meme_v4.11.4/etc/job_status.tmpl'); $template->param( program => $self->{program}, files => \@found_files, msgs => $self->{messages}, status => $status ); print $fh $template->output; close($fh) or _log_and_die("Failed to close \"" . $self->{page} . "\"."); } sub remaining_time { $logger->trace('call StatusPage::remaining_time') if $logger; my $self = shift; return $Globals::MAXTIME - int(&Time::HiRes::tv_interval($self->{when}, [&Time::HiRes::gettimeofday()]) + 0.5); } sub load_alphabet { $logger->trace('call StatusPage::load_alphabet') if $logger; my $self = shift; my ($type, $file) = @_; my $alphabet; if (defined($file)) { eval { $alphabet = new Alphabet($file); }; if ($@) { my $message = "Failed to load alphabet definition from \"$file\".\n" . $@; $self->add_message($message); print STDERR $message; $self->update("Error"); $self->write_log(); exit(1); } } else { $alphabet = ($type eq 'RNA' ? rna() : ($type eq 'DNA' ? dna() : protein())); } return $alphabet; } sub run { $logger->trace('call StatusPage::run') if $logger; my $self = shift; my (%invk_opts) = @_; $self->add_file('tidings', $self->{output}, 'Warning Messages') unless $self->{file_keys}->{tidings}; my $prog = $invk_opts{PROG}; my @args = @{$invk_opts{ARGS}}; unless (defined($invk_opts{ALL_FILE}) || defined($invk_opts{ALL_VAR}) || ((defined($invk_opts{OUT_FILE}) || defined($invk_opts{OUT_VAR})) && (defined($invk_opts{ERR_FILE}) || defined($invk_opts{ERR_VAR})))) { # we can redirect something! if (defined($invk_opts{OUT_FILE}) || defined($invk_opts{OUT_VAR})) { # redirect ERR output $invk_opts{ERR_FILE} = $self->{output}; $invk_opts{ERR_TRUNCATE} = 0; } elsif (defined($invk_opts{ERR_FILE}) || defined($invk_opts{ERR_VAR})) { # redirect OUT output $invk_opts{OUT_FILE} = $self->{output}; $invk_opts{OUT_TRUNCATE} = 0; } else { # we can redirect everything! $invk_opts{ALL_FILE} = $self->{output}; $invk_opts{TRUNCATE} = 0; } } $self->add_message('Starting '.$prog.'
' . stringify_args($prog, @args) . ''); $self->update("Starting"); my ($time, $oot, $status_code); $oot = 0; # FALSE unless (defined($invk_opts{TIMEOUT})) { $invk_opts{TIMEOUT} = $self->remaining_time(); } $invk_opts{TIME} = \$time; $invk_opts{OOT} = \$oot; # run the program $status_code = invoke(%invk_opts); my $status_msg; if ($oot) { $status_msg = "Ran out of time! Stopping $prog."; $self->add_message($status_msg); print STDERR $status_msg, "\n"; } my $err = ($status_code != 0 || $oot); if ($err) { if ($status_code == -1) { $status_msg = $prog . " failed to run"; } elsif ($status_code & 127) { $status_msg = $prog . " process died with signal " . ($status_code & 127) . ", " . (($status_code & 128) ? 'with' : 'without') . " coredump"; } else { $status_msg = $prog . " exited with error code " . ($status_code >> 8); } print STDERR $status_msg, "\n"; $self->update_file('tidings', DESC => 'Error Messages'); } else { $status_msg = $prog . ' ran successfully in ' . (int($time * 100 + 0.5) / 100) . ' seconds'; } $self->add_message($status_msg); $self->update($err ? "Error" : ""); if ($err) { $self->write_log(); exit(1); } } sub write_log { $logger->trace('call StatusPage::write_log') if $logger; my $self = shift; # the host my $host = hostname; # the current directory without path my $jobid = (splitdir(getcwd()))[-1]; # the unique user identifier (aka universally unique identifier) my $uuid = 'no_uuid_specified'; if (-e 'uuid') { $uuid = `cat uuid`; unlink 'uuid'; } # the command line arguments my $args = stringify_args(@{$self->{argv}}); # convert timestamp into start time and end time my $start_time = &_format_log_date($self->{when}->[0]); # the submission time if it is avaliable but use the start time as a default my $submit_time = $start_time; if (-e 'submit_time_file') { $submit_time = `cat submit_time_file`; unlink 'submit_time_file'; } # the end time (now) my $end_time = &_format_log_date(&Time::HiRes::gettimeofday()); # create the path to the log file my $logfile = catfile('/mit/meme_v4.11.4/LOGS', $self->{log}); # open the log file for appending my $logfh; sysopen($logfh, $logfile, O_CREAT | O_APPEND | O_WRONLY) or &_log_and_die("Unable to open invocation log for appending ($logfile)."); # write out the invocation log print $logfh "$host $jobid submit: $submit_time start: $start_time end: $end_time $args $uuid\n"; # close the log file close($logfh); # cleanup $self->{cleanup}->(); } sub _opt_desc { my ($opt) = @_; # this trick should apparently differentiate between "0" and 0 if (length( do { no warnings "numeric"; $opt & "" } )) { my $num = $opt + 1; if ($num == 1) { return '1st non-option'; } elsif ($num == 2) { return '2nd non-option'; } elsif ($num == 3) { return '3rd non-option'; } else { return $num . 'th non-option'; } } else { return 'option ' . $opt; } } sub _format_log_date { $logger->trace('call _format_log_date') if $logger; my ($seconds_since_epoch) = @_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime($seconds_since_epoch); return sprintf('%02d/%02d/%02d %02d:%02d:%02d', $mday, $mon + 1, $year % 100, $hour, $min, $sec); } sub _log_and_die { if ($logger) { $Log::Log4perl::caller_depth++; $logger->logdie(@_); } else { die(@_); } }