#!/usr/bin/perl -w # AUTHOR: James Johnson # CREATE DATE: 18/9/2014 # DESCRIPTION: Convert regular expression motifs in ELM format use warnings; use strict; use lib qw(/mit/meme_v4.11.4/lib/perl); use Alphabet qw(protein); use MotifUtils qw(seq_to_intern intern_to_meme read_background_file); use Fcntl qw(O_RDONLY); use Getopt::Long; use Pod::Usage; =head1 NAME elm2meme - Converts ELM motifs into MEME motifs. =head1 SYNOPSIS elm2meme [options] + Options: -bg file with background frequencies of letters; default: uniform background -pseudo add times letter background to each frequency; default: 0 -logodds print log-odds matrix, too; default: print frequency matrix only -url website for the motif; The motif name is substituted for MOTIF_NAME; -h print usage message Converts ELM motifs into MEME motifs Writes to standard output. =cut # Set option defaults my $help = 0; my $bg_file; my $pseudo_total = 0; my $print_logodds = 0; my $url_pattern = ""; my @files = (); GetOptions( "bg=s" => \$bg_file, "pseudo=f" => \$pseudo_total, "logodds" => \$print_logodds, "url=s" => \$url_pattern, "help|?" => \$help) or pod2usage(2); #output help if requested pod2usage(1) if $help; @files = @ARGV; pod2usage("Missing ELM files") unless @files; # get the background model my %bg = &read_background_file(&protein(), $bg_file); my $num_skipped = 0; my $num_failed = 0; my $num_motifs = 0; foreach my $file (@files) { my $fh; sysopen($fh, $file, O_RDONLY); my $line; while ($line = <$fh>) { next if $line =~ m/^#/; #skip comments next if $line =~ m/^\"Accession\"/; #skip field names my @parts = split /"\t"/, $line; if (@parts) { $parts[0] =~ s/^"//; $parts[scalar(@parts) -1] =~ s/"$//; } #Accession ELMIdentifier Description Regex Probability #Instances #Instances_in_PDB my ($acc, $elmid, $func_site_name, $desc, $regex, $prob, $nsites, $inst_in_pdb) = @parts; # can not handle motifs that must be anchored or are of variable length if ($regex =~ m/(^|[^\[])\^/ || $regex =~ m/[\$\{\}\?\|\*\+]/) { $num_skipped++; printf STDERR "Skipping %15s %25s %s\n", $acc, $elmid, $regex; next; } # remove unneeded brackets - # as we've already checked for length quantifiers, ORs and anchors these # can't really be doing anything $regex =~ s/[\(\)]//g; $nsites = 1 if $nsites <= 0; my $url = $url_pattern; $url =~ s/MOTIF_NAME/$acc/g; my ($motif, $errors) = seq_to_intern(\%bg, $regex, $nsites, $pseudo_total, url => $url, id => $acc, alt => $elmid, also => '.'); print STDERR join("\n", @{$errors}), "\n" if @{$errors}; print intern_to_meme($motif, $print_logodds, 1, !($num_motifs++)) if $motif; $num_failed++ unless $motif; } close($fh); } print STDERR "Skipped: $num_skipped, Failed: $num_failed, Converted: $num_motifs\n";