#!/usr/bin/python # Read GLAM2 output: write an HTML version of it import fileinput, os, tempfile, sys from string import replace embed_seqs = 0 version = '' commandline = '' alphabet = '?' alignments = [] text_alignments = [] freq_matrices = [] state = 0 usage = """ USAGE: %s \tConvert glam2 output to HTML. \tReads standard input. \tWrites standard output. """ % (sys.argv[0]) # parse command line i=1 while i < len(sys.argv): arg = sys.argv[i] print >> sys.stderr, "Unknown command line argument: " + arg; sys.exit(1) i += 1 for line in sys.stdin.readlines(): fields = line.split() if state == 0: if line.startswith('Version'): version = line elif line.find('glam2') != -1: commandline = line elif line.startswith('Residue counts'): if len(fields) == 4 + 3: alphabet = 'n' alen = 4 elif len(fields) == 20 + 3: alphabet = 'p' alen = 20 state += 1 elif state == 1: if len(fields) > 1 and fields[0] == 'Score:': score = fields[1] columns = int(fields[3]) sequences = int(fields[5]) state += 1 elif state == 2: if len(fields) == 1: keypos = fields[0] aln = [] alignments.append([score, keypos, aln]) text_aln = line text_alignments.append(text_aln) state += 1 elif state == 3: if len(fields) == 6: assert len(fields[2]) == len(keypos) aln.append(fields) text_alignments[len(text_alignments)-1] += line else: state += 1 elif state == 4: if len(fields) == alen + 3 and fields[alen] == 'Del': ipos = 0 pspm = [] freq_matrices.append([pspm, columns, sequences]) state += 1 elif state == 5: if len(fields) == alen + 2: pspm.append(fields[:alen]) ipos += 1 if ipos == columns: state = 1 assert len(alignments) > 0 # print the HTML header: print '' print '
' print '', version, '
', commandline, '
If you use this program in your research, please cite: \ MC Frith, NFW Saunders, B Kobe, TL Bailey, "Discovering sequence motifs with arbitrary insertions and deletions", PLoS Computational Biology, 4(5):e1000071, 2008.\ \ [full text]
' print '' print ''