#!/usr/bin/perl -w # # FILE: chen2meme # AUTHOR: Timothy L. Bailey # CREATE DATE: 18/12/2009 # DESCRIPTION: Convert a file containing list of TFBS motif matrices from # CHEN to MEME output format. use warnings; use strict; use lib qw(/mit/meme_v4.11.4/lib/perl); use Alphabet qw(dna); use MotifUtils qw(matrix_to_intern intern_to_meme read_background_file); use Getopt::Long; use Pod::Usage; =head1 NAME chen2meme - Converts a CHEN (concatenated) matrix file into MEME motifs. =head1 SYNOPSIS chen2meme [options] Options: -skip skip this ID (may be repeated) -bg file with background frequencies of letters; default: uniform background -pseudo add times letter background to each frequency; default: 0 -logodds print log-odds matrix, too; default: print frequency matrix only -url website for the motif; The motif name is substituted for MOTIF_NAME; -h print usage message Converts a CHEN (concatenated) matrix file into MEME motifs. Reads standard input. Writes standard output. =cut # constants my $sites = 10; # Set option defaults my %skips; my $bg_file; my $pseudo_total = 0; my $print_logodds = 0; my $url_pattern = ""; my $use_descr_as_name = 0; my $help = 0; # duplicates my %dup_check = (); GetOptions("skip=s" => sub {$skips{$_[1]} = 1}, "bg=s" => \$bg_file, "pseudo=f" => \$pseudo_total, "logodds" => \$print_logodds, "url=s" => \$url_pattern, "id" => \$use_descr_as_name, "h" => \$help) or pod2usage(2); #check pseudo total pod2usage("Option -pseudo must have a positive value.") if ($pseudo_total < 0); #output help if requested pod2usage(1) if $help; # Get the background model. my %bg = &read_background_file(&dna(), $bg_file); # Read the input file. my $num_skipped = 0; my $num_motifs = 0; my (%motifs, $motif_alt, $motif_name, $motif_matrix); while (<>) { chomp; # skip blank lines next if (/^\s*$/); if (/^>(\S+)/) { my $next_name = $1; my @fields = split; my $next_alt = ($#fields > 0) ? $fields[1] : ""; # create previous motif conditional_motif_output(); # motif id line $motif_name = $next_name; $motif_alt = $next_alt; $motif_matrix = ""; } else { $motif_matrix .= $_ . "\n"; } } # create last motif conditional_motif_output(); foreach my $name (sort keys %motifs) { print intern_to_meme($motifs{$name}, $print_logodds, 1, !($num_motifs++)); } print(STDERR "Converted $num_motifs motifs.\n"); print(STDERR "Skipped $num_skipped motifs.\n"); 1; sub conditional_motif_output { if ($motif_matrix) { if (defined($skips{$motif_name})) { $num_skipped++; } else { my $url = $url_pattern; $url =~ s/MOTIF_NAME/$motif_name/g; # check for duplicate motif names if ($dup_check{$motif_name}) { my $seen = $dup_check{$motif_name}; while ($dup_check{$motif_name . "_" . ($seen + 1)}) { $seen++; } $dup_check{$motif_name} = $seen + 1; $motif_name .= "_" . ($seen + 1); } $dup_check{$motif_name} = 1; my ($motif, $errors) = matrix_to_intern(\%bg, $motif_matrix, 'row', $sites, $pseudo_total, id => $motif_name, alt => $motif_alt, url => $url); print(STDERR join("\n", @{$errors}), "\n") if @{$errors}; $motifs{$motif_name} = $motif if $motif; } } }