#!/usr/bin/perl -w # # $Id:$ # # FILE: iupac2meme # AUTHOR: Timothy L. Bailey # CREATE DATE: 3/06/2010 # DESCRIPTION: Convert a DNA IUPAC motif to MEME format use warnings; use strict; use lib qw(/mit/meme_v4.11.4/lib/perl); use MotifUtils qw(seq_to_intern intern_to_meme load_alphabet read_background_file); use Getopt::Long; use Pod::Usage; =head1 NAME iupac2meme - Converts IUPAC motifs into MEME motifs. =head1 SYNOPSIS iupac2meme [options] + Options: -dna use DNA IUPAC alphabet -protein use protein IUPAC alphabet -alph file with alphabet definition; default: use DNA IUPAC alphabet -numseqs assume frequencies based on this many sequences; default: 20 -bg file with background frequencies of letters; default: uniform background -pseudo add times letter background to each frequency; default: 0 -logodds output the log-odds (PSSM) and frequency (PSPM) motifs; default: PSPM motif only -url website for the motif; The motif name is substituted for MOTIF_NAME; default: no url -nosort don't sort the order of motifs -named looks for a motif name after each IUPAC code; default: use the IUPAC code as the motif name Converts IUPAC motifs into MEME motifs. Example IUPAC DNA motif: ACGGWNNYCGT Example IUPAC PROTEN motif: IKLVBZYXXHG Writes standard output. =cut # Set option defaults my $is_dna = 1; my $alph_file = undef; my $num_seqs = 20; my $bg_file; my $pseudo_total = 0; my $print_logodds = 0; my $url_pattern = ""; my $no_sort = 0; my $named = 0; my @motif_strings = (); GetOptions( "dna" => \$is_dna, "protein" => sub {$is_dna = 0}, "alph=s" => \$alph_file, "numseqs=i" => \$num_seqs, "bg=s" => \$bg_file, "pseudo=f" => \$pseudo_total, "logodds" => \$print_logodds, "url=s" => \$url_pattern, "nosort" => \$no_sort, "named" => \$named) or pod2usage(2); @motif_strings = @ARGV; #check num seqs pod2usage("Option -numseqs must have a positive value.") if ($num_seqs < 0); #check pseudo total pod2usage("Option -pseudo must have a positive value.") if ($pseudo_total < 0); #check IUPAC motifs pod2usage("At least one IUPAC motif must be specified.") if (!@motif_strings); # pair up with names my @motif_pairs = (); while (@motif_strings) { my $motif_iupac = shift @motif_strings; my $motif_name = $named ? shift @motif_strings : $motif_iupac; push(@motif_pairs, {iupac => $motif_iupac, name => $motif_name}); } #sort the motifs @motif_pairs = sort {$a->{iupac} cmp $b->{iupac}} @motif_pairs unless $no_sort; # get the background model my %bg = &read_background_file(&load_alphabet($is_dna, $alph_file), $bg_file); # # convert the IUPAC motifs to MEME format # my $num_motifs = 0; foreach my $pair (@motif_pairs) { my $iupac_motif = $pair->{iupac}; my $name = $pair->{name}; my $url = $url_pattern; $url =~ s/MOTIF_NAME/$iupac_motif/g; my ($motif, $errors) = seq_to_intern(\%bg, $iupac_motif, $num_seqs, $pseudo_total, url => $url, id => $name); print STDERR join("\n", @{$errors}), "\n" if @{$errors}; print intern_to_meme($motif, $print_logodds, 1, !($num_motifs++)) if $motif; } # convert motif to MEME format