#!/usr/bin/perl =head1 NAME mast_xml_to_html - Make a MAST HTML output from a MAST XML output. =head1 SYNOPSIS mast_xml_to_html =cut use strict; use warnings; use Cwd qw(abs_path); use Fcntl qw(O_RDONLY SEEK_SET); use File::Basename qw(fileparse); use File::Spec::Functions qw(tmpdir); use File::Temp qw(tempfile); use Getopt::Long; use Pod::Usage; use XML::Parser::Expat; use lib '/mit/meme_v4.11.4/lib/perl'; my $etc_dir; my $temp_dir; my $scripts_dir; # # initialise the global constants # sub initialise { # setup etc dir $etc_dir = defined($ENV{MEME_ETC_DIR}) ? $ENV{MEME_ETC_DIR} : '/mit/meme_v4.11.4/etc'; # setup temporary directory $temp_dir = ''; # use the perl default if none is supplied or the replace fails $temp_dir = tmpdir() if ($temp_dir eq '' || $temp_dir =~ m/^\@TMP[_]DIR\@$/); # find the location of the script my $script_name; ($script_name, $scripts_dir) = fileparse(__FILE__); $scripts_dir = abs_path($scripts_dir); # add script location to search path unshift(@INC, $scripts_dir); require HtmlMonolithWr; require MastSAX; } sub arguments { # Set Option Defaults my $options = {XML_PATH => undef, HTML_PATH => undef}; # General Options my $help = 0; # FALSE my @errors = (); my @dbs = (); # get the options from the arguments my $options_success = 0; # FALSE # redirect stderr to a temp file so we can get the error message from GetOptions my $olderr; my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $temp_dir, UNLINK => 1); open($olderr, ">&STDERR") or die("Can't dup STDERR: $!"); open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!"); # parse options $options_success = GetOptions( 'help|?' => \$help, ); ($options->{XML_PATH}, $options->{HTML_PATH}) = @ARGV; # display help pod2usage(1) if $help; # reset STDERR open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!"); # read argument parsing errors seek($tmperr, 0, SEEK_SET); while (<$tmperr>) {chomp; push(@errors, $_);} close($tmperr); # check source XML file unless (defined($options->{XML_PATH})) { push(@errors, "No MAST XML file specified"); } elsif (not -e $options->{XML_PATH}) { push(@errors, "The MAST XML file specified does not exist"); } unless (defined($options->{HTML_PATH})) { push(@errors, "No output file specified"); } # print errors foreach my $error (@errors) { print STDERR $error, "\n"; } pod2usage(2) if @errors; # return options return $options; } sub _start_mast { my ($info, $version, $release) = @_; my $wr = $info->{wr}; $wr->str_prop('program', 'MAST'); $wr->str_prop('version', $version); $wr->str_prop('release', $release); } sub _handle_command_line { my ($info, @args) = @_; my $wr = $info->{wr}; $wr->str_array_prop("cmd", @args); } sub _handle_settings { my ($info, $strand_handling, $max_correlation, $remove_correlated, $max_seq_evalue, $adjust_hit, $max_hit_pvalue, $max_weak_pvalue) = @_; my $wr = $info->{wr}; $wr->property("settings"); $wr->start_object_value(); $wr->str_prop("strand_handling", $strand_handling); $wr->num_prop("max_correlation", $max_correlation); $wr->bool_prop("remove_correlated", $remove_correlated); $wr->num_prop("max_seq_evalue", $max_seq_evalue); $wr->bool_prop("adjust_hit", $adjust_hit); $wr->num_prop("max_hit_pvalue", $max_hit_pvalue); $wr->num_prop("max_weak_pvalue", $max_weak_pvalue); $wr->end_object_value(); } sub _handle_alphabet { my ($info, $alph) = @_; my $wr = $info->{wr}; $wr->property("alphabet"); $alph->to_json($wr); } sub _handle_sequence_alphabet { my ($info, $seq_alph) = @_; my $wr = $info->{wr}; $wr->property("sequence_alphabet"); $seq_alph->to_json($wr); } sub _handle_translate { my ($info, $num_seq, $num_mot) = @_; my $wr = $info->{wr}; $wr->num_prop("xlate", $num_seq); } sub _handle_background { my ($info, $from, @probs) = @_; my $wr = $info->{wr}; $wr->property("background"); $wr->start_object_value(); $wr->str_prop("source", $from); $wr->num_array_prop("freqs", @probs); $wr->end_object_value(); } sub _start_motif_dbs { my ($info) = @_; my $wr = $info->{wr}; $wr->property("motif_dbs"); $wr->start_array_value(); } sub _handle_motif_db { my ($info, $source, $name, $last_mod_date, $bg) = @_; my $wr = $info->{wr}; $wr->start_object_value(); $wr->str_prop("source", $source); $wr->str_prop("name", $name) if defined $name; $wr->str_prop("last_mod_date", $last_mod_date); $wr->num_array_prop("bg", @{$bg}); $wr->end_object_value(); } sub _end_motif_dbs { my ($info) = @_; my $wr = $info->{wr}; $wr->end_array_value(); } sub _start_motifs { my ($info) = @_; my $wr = $info->{wr}; $wr->property("motifs"); $wr->start_array_value(); } sub _handle_motif { my ($info, $db, $id, $alt, $len, $nsites, $evalue, $bad, $url, $psm) = @_; $info->{nmotifs} += 1; my $wr = $info->{wr}; $wr->start_object_value(); $wr->num_prop("db", $db); $wr->str_prop("id", $id); $wr->str_prop("alt", $alt) if (defined($alt)); $wr->num_prop("len", $len); $wr->num_prop("nsites", $nsites) if (defined($nsites)); $wr->str_prop("evalue", $evalue) if (defined($evalue)); $wr->bool_prop("bad", $bad) if (defined($bad)); $wr->str_prop("url", $url) if $url; $wr->property("psm"); $wr->start_array_value(); for (my $i = 0; $i < scalar(@{$psm}); $i++) { $wr->num_array_value(@{$psm->[$i]}); } $wr->end_array_value(); $wr->end_object_value(); } sub _end_motifs { my ($info) = @_; my $wr = $info->{wr}; $wr->end_array_value(); } sub _start_correlations { my ($info) = @_; } sub _handle_correlation { my ($info, $idx_a, $idx_b, $value) = @_; my $set = $info->{correlations}; $set->{$idx_a} = {} unless defined $set->{$idx_a}; $set->{$idx_a}->{$idx_b} = $value; } sub _end_correlations { my ($info) = @_; my $wr = $info->{wr}; my $nmotifs = $info->{nmotifs}; my $set = $info->{correlations}; $wr->property("correlation"); $wr->start_array_value(); for (my $i = 0; $i < $nmotifs; $i++) { $wr->start_array_value(); for (my $j = 0; $j < $nmotifs; $j++) { if ($i == $j) { $wr->null_value(); } else { my $value = ($j < $i ? $set->{$j}->{$i} : $set->{$i}->{$j}); print "i: $i, j: $j\n" unless defined $value; $wr->num_value($value); } } $wr->end_array_value(); } $wr->end_array_value(); } sub _start_nos { my ($info) = @_; my $wr = $info->{wr}; $wr->property("nos"); $wr->start_array_value(); } sub _handle_expect { my ($info, $gap, $idx) = @_; my $wr = $info->{wr}; $wr->start_object_value(); $wr->num_prop("gap", $gap) if defined $gap; $wr->num_prop("idx", $idx); $wr->end_object_value(); } sub _end_nos { my ($info) = @_; my $wr = $info->{wr}; $wr->end_array_value(); } sub _start_sequence_dbs { my ($info) = @_; my $wr = $info->{wr}; $wr->property("sequence_dbs"); $wr->start_array_value(); } sub _handle_sequence_db { my ($info, $source, $name, $last_mod_date, $sequence_count, $residue_count, $link) = @_; my $wr = $info->{wr}; $wr->start_object_value(); $wr->str_prop("source", $source); $wr->str_prop("name", $name) if defined $name; $wr->str_prop("last_mod_date", $last_mod_date); $wr->num_prop("sequence_count", $sequence_count); $wr->num_prop("residue_count", $residue_count); $wr->str_prop("link", $link) if defined $link; $wr->end_object_value(); } sub _end_sequence_dbs { my ($info) = @_; my $wr = $info->{wr}; $wr->end_array_value(); } sub _start_sequences { my ($info) = @_; my $wr = $info->{wr}; $wr->property("sequences"); $wr->start_array_value(); } sub _start_sequence { my ($info, $db, $name, $comment, $length) = @_; $info->{scores} = []; # False $info->{first_seg} = 1; # True my $wr = $info->{wr}; $wr->start_object_value(); $wr->num_prop("db", $db); $wr->str_prop("name", $name); $wr->str_prop("comment", $comment) if defined $comment; $wr->num_prop("length", $length); } sub _handle_score { my ($info, $strand, $combined_pvalue, $evalue, $frame) = @_; my $pos = ($strand eq 'both' || $strand eq 'forward' ? 0 : 1); while (scalar(@{$info->{scores}}) < $pos) { push(@{$info->{scores}}, undef); } $info->{scores}->[$pos] = { combined_pvalue => $combined_pvalue, evalue => $evalue, frame => $frame }; } sub _write_scores { my ($info) = @_; my $wr = $info->{wr}; $wr->property("score"); $wr->start_array_value(); for (my $i = 0; $i < scalar(@{$info->{scores}}); $i++) { my $score = $info->{scores}->[$i]; if (defined($score)) { $wr->start_object_value(); $wr->num_prop("combined_pvalue", $score->{combined_pvalue}); $wr->num_prop("evalue", $score->{evalue}); $wr->str_prop("frame", $score->{frame}) if (defined($score->{frame})); $wr->end_object_value(); } else { $wr->null_value(); } } $wr->end_array_value(); } sub _start_seg { my ($info, $start_pos) = @_; my $wr = $info->{wr}; if ($info->{first_seg}) { &_write_scores($info); $wr->property("segs"); $wr->start_array_value(); $info->{first_seg} = 0; # False } $wr->start_object_value(); $wr->num_prop("pos", $start_pos - 1); # change to zero indexed } sub _handle_data { my ($info, $sequence) = @_; my $wr = $info->{wr}; $wr->str_prop("data", $sequence); $wr->property("hits"); $wr->start_array_value(); } sub _handle_hit { my ($info, $pos, $idx, $rc, $pvalue, $match, $translation) = @_; my $wr = $info->{wr}; $wr->start_object_value(); $wr->num_prop("pos", $pos - 1); # change to zero indexed $wr->num_prop("idx", $idx); $wr->bool_prop("rc", $rc); $wr->num_prop("pvalue", $pvalue); $wr->str_prop("match", $match); $wr->str_prop("translation", $translation) if defined $translation; $wr->end_object_value(); } sub _end_seg { my ($info) = @_; my $wr = $info->{wr}; $wr->end_array_value(); $wr->end_object_value(); } sub _end_sequence { my ($info) = @_; my $wr = $info->{wr}; if ($info->{first_seg}) { &_write_scores($info); } else { $wr->end_array_value(); } $wr->end_object_value(); } sub _end_sequences { my ($info) = @_; my $wr = $info->{wr}; $wr->end_array_value(); } sub _handle_runtime { my ($info, $host, $when, $cycles, $seconds) = @_; my $wr = $info->{wr}; $wr->property("runtime"); $wr->start_object_value(); $wr->str_prop("host", $host); $wr->str_prop("when", $when); $wr->num_prop("cycles", $cycles); $wr->num_prop("seconds", $seconds); $wr->end_object_value(); } sub _end_mast { my ($info) = @_; } sub transform_data { my ($opts, $jsonwr) = @_; my $info = {wr => $jsonwr, correlations => {}, nmotifs => 0}; my $sax = new MastSAX($info, start_mast => \&_start_mast, handle_command_line => \&_handle_command_line, handle_settings => \&_handle_settings, handle_alphabet => \&_handle_alphabet, handle_sequence_alphabet => \&_handle_sequence_alphabet, handle_translate => \&_handle_translate, handle_background => \&_handle_background, start_motif_dbs => \&_start_motif_dbs, handle_motif_db => \&_handle_motif_db, end_motif_dbs => \&_end_motif_dbs, start_motifs => \&_start_motifs, handle_motif => \&_handle_motif, end_motifs => \&_end_motifs, start_correlations => \&_start_correlations, handle_correlation => \&_handle_correlation, end_correlations => \&_end_correlations, start_nos => \&_start_nos, handle_expect => \&_handle_expect, end_nos => \&_end_nos, start_sequence_dbs => \&_start_sequence_dbs, handle_sequence_db => \&_handle_sequence_db, end_sequence_dbs => \&_end_sequence_dbs, start_sequences => \&_start_sequences, start_sequence => \&_start_sequence, handle_score => \&_handle_score, start_seg => \&_start_seg, handle_data => \&_handle_data, handle_hit => \&_handle_hit, end_seg => \&_end_seg, end_sequence => \&_end_sequence, end_sequences => \&_end_sequences, handle_runtime => \&_handle_runtime, end_mast => \&_end_mast ); my $fh; sysopen($fh, $opts->{XML_PATH}, O_RDONLY) or die("Failed to open file \"$opts->{XML_PATH}\"\n"); while (<$fh>) { $sax->parse_more($_); if ($sax->has_errors()) { die("Failed to write HTML output due to errors processing the XML:\n" . join("\n", $sax->get_errors())); } } $sax->parse_done(); if ($sax->has_errors()) { die("Failed to write HTML output due to errors processing the XML:\n" . join("\n", $sax->get_errors())); } } sub main { &initialise(); my $opts = &arguments(); # start writing HTML my $htmlwr = new HtmlMonolithWr($etc_dir, 'mast_template.html', $opts->{HTML_PATH}, 'mast_data.js' => 'data'); # transform the XML into JSON &transform_data($opts, $htmlwr->output()); # finish writing HTML $htmlwr->output(); } &main(); 1;