package ReadFastaFile; use strict; use warnings; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw(read_fasta_sequence read_fasta_file); # GLOBAL state my $__fasta_next_seq_name__; my $__fasta_next_comment__; my $__fasta_eof__; ################################################################################ # read_fasta_sequence(fh) # # File handle passed by reference. # Returns (name, seq, comment) where comment is anything after the name # on the same line; can call ignore the comment value on return. To start # a new file, on the first call add a parameter of any true value at end ################################################################################ sub read_fasta_sequence { my ($fh, $newfile) = @_; my ($name, $seq, $comment); if ($newfile) { $__fasta_next_seq_name__ = $__fasta_next_comment__ = undef; $__fasta_eof__ = 0; } else { $__fasta_eof__ = 0 unless defined $__fasta_eof__; if ($__fasta_eof__ == 1) { return(()); } # EOF reached already $name = $__fasta_next_seq_name__; # use saved name from last call $comment = $__fasta_next_comment__; # and rest of name line } $seq = ""; while (<$fh>) { if (/^>(\S+)\s+(.*$)/) { # found start of next sequence $__fasta_next_seq_name__ = $1; # save next sequence name $__fasta_next_comment__ = $2; # save next sequence name if (defined $name) { return($name, $seq, $comment); # return the sequence just read } else { $name = $__fasta_next_seq_name__; # first sequence $comment = $__fasta_next_comment__; # and rest of name line } } else { s/\s//g; # remove space from sequence s/\.//g; # remove "."s from sequence next if ($_ eq ""); $seq .= $_; } } $__fasta_eof__ = 1; return($name, $seq, $comment); # last sequence } # read_fasta_sequence ################################################################################ # read_fasta_file # # Read a FASTA sequence file and return the sequences in an # associative array indexed by sequence name. # NB. removes '.' characters from sequence # # $file open file pointer # ################################################################################ sub read_fasta_file { my ($file) = @_; my ($name, $seq); my %seqs = (); open(F, "<$file") || die("Can't open file $file\n"); # read the file $seq = ""; while () { if (/^>(\S+)\s/) {# found start of sequence if (defined $name) {# save previous sequence in array $seqs{$name} = $seq; } $name = $1;# save sequence name $seq = ""; } else { s/\s//g;# remove space from sequence s/\.//g;# remove "."s from sequence next if ($_ eq ""); $seq .= $_; } } # read file if ($name && $seq) {# save very last sequence in array $seqs{$name} = $seq; } return %seqs;# return array of sequences } # read_fasta_file 1;