#!/usr/bin/perl
=head1 NAME
dreme_xml_to_html - Make a DREME HTML output from a DREME XML output.
=head1 SYNOPSIS
dreme_xml_to_html
=cut
use strict;
use warnings;
use Cwd qw(abs_path);
use Fcntl qw(O_CREAT O_RDONLY O_WRONLY O_TRUNC SEEK_SET);
use File::Basename qw(fileparse);
use File::Spec::Functions qw(tmpdir);
use File::Temp qw(tempfile);
use Getopt::Long;
use Pod::Usage;
use XML::Parser::Expat;
use lib '/mit/meme_v4.11.4/lib/perl';
my $etc_dir;
my $temp_dir;
my $scripts_dir;
#
# initialise the global constants
#
sub initialise {
# setup etc dir
$etc_dir = defined($ENV{MEME_ETC_DIR}) ? $ENV{MEME_ETC_DIR} : '/mit/meme_v4.11.4/etc';
# setup temporary directory
$temp_dir = '';
# use the perl default if none is supplied or the replace fails
$temp_dir = tmpdir() if ($temp_dir eq '' || $temp_dir =~ m/^\@TMP[_]DIR\@$/);
# find the location of the script
my $script_name;
($script_name, $scripts_dir) = fileparse(__FILE__);
$scripts_dir = abs_path($scripts_dir);
# add script location to search path
unshift(@INC, $scripts_dir);
require Alphabet;
require DremeSAX;
}
sub arguments {
# Set Option Defaults
my $options = {XML_PATH => undef, TXT_PATH => undef};
# General Options
my $help = 0; # FALSE
my @errors = ();
my @dbs = ();
# get the options from the arguments
my $options_success = 0; # FALSE
# redirect stderr to a temp file so we can get the error message from GetOptions
my $olderr;
my $tmperr = tempfile('GetOptions_XXXXXXXXXX', DIR => $temp_dir, UNLINK => 1);
open($olderr, ">&STDERR") or die("Can't dup STDERR: $!");
open(STDERR, '>&', $tmperr) or die("Can't redirect STDERR to temp file: $!");
# parse options
$options_success = GetOptions(
'help|?' => \$help,
);
($options->{XML_PATH}, $options->{TXT_PATH}) = @ARGV;
# display help
pod2usage(1) if $help;
# reset STDERR
open(STDERR, ">&", $olderr) or die("Can't reset STDERR: $!");
# read argument parsing errors
seek($tmperr, 0, SEEK_SET);
while (<$tmperr>) {chomp; push(@errors, $_);}
close($tmperr);
# check source XML file
unless (defined($options->{XML_PATH})) {
push(@errors, "No DREME XML file specified");
} elsif (not -e $options->{XML_PATH}) {
push(@errors, "The DREME XML file specified does not exist");
}
unless (defined($options->{TXT_PATH})) {
push(@errors, "No output file specified");
}
# print errors
foreach my $error (@errors) {
print STDERR $error, "\n";
}
pod2usage(2) if @errors;
# return options
return $options;
}
sub start_dreme {
my ($info, $vmajor, $vminor, $vpatch, $release_date) = @_;
$info->{version} = "$vmajor.$vminor.$vpatch";
my $fh = $info->{fh};
print $fh "# DREME ", $info->{version}, "\n";
}
sub end_dreme {
my ($info) = @_;
}
sub start_model {
my ($info) = @_;
$info->{model} = {};
}
sub end_model {
my ($info) = @_;
}
sub handle_command_line {
my ($info, $command_line) = @_;
my $fh = $info->{fh};
print $fh "# command: $command_line\n";
}
sub handle_positives {
my ($info, $name, $count, $file, $last_mod_date) = @_;
my $fh = $info->{fh};
print $fh "# positives: $count from $file ($last_mod_date)\n";
}
sub handle_negatives {
my ($info, $name, $count, $from, $file, $last_mod_date) = @_;
my $fh = $info->{fh};
print $fh "# negatives: $count from ";
if ($from eq 'shuffled') {
print $fh "shuffled positives\n";
} else {
print $fh "$file ($last_mod_date)\n";
}
}
sub handle_alphabet {
my ($info, $alphabet) = @_;
$info->{alph} = $alphabet;
}
sub handle_strands {
my ($info, $strands) = @_;
$info->{model}->{strands} = $strands;
}
sub handle_background {
my ($info, @probs) = @_;
$info->{model}->{background} = [@probs];
}
sub handle_stop {
my ($info, $evalue, $count, $time) = @_;
}
sub handle_ngen {
my ($info, $ngen) = @_;
}
sub handle_add_pv_thresh {
my ($info, $pv_thresh) = @_;
}
sub handle_seed {
my ($info, $seed) = @_;
}
sub handle_host {
my ($info, $host) = @_;
my $fh = $info->{fh};
print $fh "# host: $host\n";
}
sub handle_when {
my ($info, $when) = @_;
my $fh = $info->{fh};
print $fh "# when: $when\n";
}
sub handle_description {
my ($info, $description) = @_;
my $fh = $info->{fh};
print $fh "# description: ", join("\n# : ", split(/\n/, $description)), "\n";
}
sub start_motifs {
my ($info) = @_;
my $fh = $info->{fh};
print $fh "\nMEME version ", $info->{version}, "\n\n";
my $alph = $info->{alph};
print $fh $alph->to_text();
if ($alph->has_complement()) {
if ($info->{model}->{strands} eq 'both') {
print $fh "\nstrands: + -\n";
} else {
print $fh "\nstrands: +\n";
}
}
print $fh "\nBackground letter frequencies (from dataset):\n";
my $i;
for ($i = 0; $i < $alph->size_core(); $i++) {
print $fh ' ' unless $i == 0;
print $fh $alph->char($i), ' ', $info->{model}->{background}->[$i];
}
print $fh "\n";
}
sub end_motifs {
my ($info) = @_;
}
sub start_motif {
my ($info, $id, $alt, $seq, $len, $nsites, $p, $n, $pvalue, $evalue, $unerased_evalue) = @_;
my $fh = $info->{fh};
print $fh "\n\nMOTIF $seq $alt\n\n";
my $alen = $info->{alph}->size_core();
$info->{pssm} = "letter-probability matrix: alength= $alen w= $len nsites= $nsites E= $evalue\n";
if ($info->{alph}->has_complement()) {
$info->{words} = "# Word RC Word Pos Neg P-value E-value\n";
$info->{words} .= sprintf("# BEST %10s %10s %10s %10s %10s %10s\n", $seq, $info->{alph}->rc_seq($seq), $p, $n, $pvalue, $evalue);
} else {
$info->{words} = "# Word Pos Neg P-value E-value\n";
$info->{words} .= sprintf("# BEST %10s %10s %10s %10s %10s\n", $seq, $p, $n, $pvalue, $evalue);
}
}
sub end_motif {
my ($info) = @_;
my $fh = $info->{fh};
print $fh $info->{words}, "\n";
print $fh $info->{pssm};
}
sub handle_pos {
my ($info, @probs) = @_;
$info->{pssm} .= join(" ", @probs) . "\n";
}
sub handle_match {
my ($info, $seq, $p, $n, $pvalue, $evalue) = @_;
if ($info->{alph}->has_complement()) {
$info->{words} .= sprintf("# %10s %10s %10s %10s %10s %10s\n", $seq, $info->{alph}->rc_seq($seq), $p, $n, $pvalue, $evalue);
} else {
$info->{words} .= sprintf("# %10s %10s %10s %10s %10s\n", $seq, $p, $n, $pvalue, $evalue);
}
}
sub handle_run_time {
my ($info, $cpu, $real, $stop) = @_;
my $fh = $info->{fh};
print $fh "\n\n# Stopping reason: ";
if ($stop eq 'evalue') {
print $fh "E-value threshold exceeded\n";
} elsif ($stop eq 'count') {
print $fh "target motif count reached\n";
} elsif ($stop eq 'time') {
print $fh "maximum running time reached\n";
}
print $fh "# Running time: $real seconds\n";
}
sub transform_data {
my ($opts, $out_fh) = @_;
my $info = {fh => $out_fh};
my $sax = new DremeSAX($info,
start_dreme => \&start_dreme,
end_dreme=> \&end_dreme,
start_model => \&start_model,
end_model => \&end_model,
handle_command_line => \&handle_command_line,
handle_positives => \&handle_positives,
handle_negatives => \&handle_negatives,
handle_alphabet => \&handle_alphabet,
handle_strands => \&handle_strands,
handle_background => \&handle_background,
handle_stop => \&handle_stop,
handle_ngen => \&handle_ngen,
handle_add_pv_thresh => \&handle_add_pv_thresh,
handle_seed => \&handle_seed,
handle_host => \&handle_host,
handle_when => \&handle_when,
handle_description => \&handle_description,
start_motifs => \&start_motifs,
end_motifs => \&end_motifs,
start_motif => \&start_motif,
end_motif => \&end_motif,
handle_pos => \&handle_pos,
handle_match => \&handle_match,
handle_run_time => \&handle_run_time
);
my $fh;
sysopen($fh, $opts->{XML_PATH}, O_RDONLY) or die("Failed to open file \"$opts->{XML_PATH}\"\n");
while (<$fh>) {
$sax->parse_more($_);
if ($sax->has_errors()) {
die("Failed to write HTML output due to errors processing the XML:\n" . join("\n", $sax->get_errors()));
}
}
$sax->parse_done();
if ($sax->has_errors()) {
die("Failed to write HTML output due to errors processing the XML:\n" . join("\n", $sax->get_errors()));
}
}
sub main {
&initialise();
my $opts = &arguments();
# open the output file
my $fh;
sysopen($fh, $opts->{TXT_PATH}, O_WRONLY | O_CREAT | O_TRUNC) or die("Failed to open file \"$opts->{TXT_PATH}\" for writing\n");
# transform the XML into text
&transform_data($opts, $fh);
}
&main();
1;