import commands import sys # Converts an string array to floats def to_float(arr): for i in range(len(arr)): arr[i] = float(arr[i]) # Converts an string array to ints def to_int(arr): for i in range(len(arr)): arr[i] = int(arr[i]) # Error check if (len(sys.argv) != 4): print "Expected 3 command line argument, but got", len(sys.argv)-1 sys.exit(1) def read_SM(file): AA = ['W','F','Y','L','I','V','M','A','G','P','C','T','S','Q','N','E','D','H','K','R'] f = open(file) lines = f.readlines() f.close() M = dict() for i in range(len(lines)): lines[i] = lines[i].rstrip() nums = lines[i].split(" ") M[i] = dict() for j in range(len(nums)): M[i][AA[j]] = float(nums[j]) return M # Read in input PDB file pdbf = sys.argv[1] seq = sys.argv[2] mfile = sys.argv[3] # Compute necessary properties bsa = commands.getoutput("getbsa " + pdbf).split("\n") # buried surface area (BSA) enp = commands.getoutput("getenvpol " + pdbf).split("\n") # fraction BSA covered by polar atoms ss = commands.getoutput("getss " + pdbf).split("\n") # secondary structure (1 - helix, 2 - sheet, 3 - other) to_float(bsa) to_float(enp) to_int(ss) # Classify all sites prof = list() # to hold 1-D profile of template for ri in range(len(bsa)): if (bsa[ri] <= 40 and ss[ri] == 3): prof.append(17) # E elif (bsa[ri] <= 40 and ss[ri] == 2): prof.append(16) # Eb elif (bsa[ri] <= 40 and ss[ri] == 1): prof.append(15) # Ea elif (bsa[ri] <= 114 and bsa[ri] > 40 and enp[ri] >= 0.67 and ss[ri] == 3): prof.append(14) # P2 elif (bsa[ri] <= 114 and bsa[ri] > 40 and enp[ri] >= 0.67 and ss[ri] == 2): prof.append(13) # P2b elif (bsa[ri] <= 114 and bsa[ri] > 40 and enp[ri] >= 0.67 and ss[ri] == 1): prof.append(12) # P2a elif (bsa[ri] <= 114 and bsa[ri] > 40 and enp[ri] < 0.67 and ss[ri] == 3): prof.append(11) # P1 elif (bsa[ri] <= 114 and bsa[ri] > 40 and enp[ri] < 0.67 and ss[ri] == 2): prof.append(10) # P1b elif (bsa[ri] <= 114 and bsa[ri] > 40 and enp[ri] < 0.67 and ss[ri] == 1): prof.append(9) # P1a elif (bsa[ri] > 114 and enp[ri] >= 0.58 and ss[ri] == 3): prof.append(8) # B3 elif (bsa[ri] > 114 and enp[ri] >= 0.58 and ss[ri] == 2): prof.append(7) # B3b elif (bsa[ri] > 114 and enp[ri] >= 0.58 and ss[ri] == 1): prof.append(6) # B3a elif (bsa[ri] > 114 and enp[ri] >= 0.45 and enp[ri] < 0.58 and ss[ri] == 3): prof.append(5) # B2 elif (bsa[ri] > 114 and enp[ri] >= 0.45 and enp[ri] < 0.58 and ss[ri] == 2): prof.append(4) # B2b elif (bsa[ri] > 114 and enp[ri] >= 0.45 and enp[ri] < 0.58 and ss[ri] == 1): prof.append(3) # B2a elif (bsa[ri] > 114 and enp[ri] < 0.45 and ss[ri] == 3): prof.append(2) # B1 elif (bsa[ri] > 114 and enp[ri] < 0.45 and ss[ri] == 2): prof.append(1) # B1b elif (bsa[ri] > 114 and enp[ri] < 0.45 and ss[ri] == 1): prof.append(0) # B1a else: print "residue with index %d could not be cassified (bsa = %.2f, f = %.2f, ss = %d)" % (ri, bsa[ri], enp[ri], ss[ri]) sys.exit(-1) # Read Eisenberg et. al matrix from file M = read_SM(mfile) def DPAlign(seqA, seqB, gp, M): y = len(seqA) + 1 x = len(seqB) + 1 S = list() # table of scores D = list() # table of directions for i in range(y): S.append(list()); D.append(list()) for j in range(x): S[i].append(gp*(i == 0 or j == 0)*max(i, j)) D[i].append(0) # start filling out the dynamic programming matrix for i in range(1, y): for j in range(1, x): # we can arrive at [i,j] from either north or [i-1,j], west [i,j-1], or north-west [i-1,j-1] n = S[i-1][j] + gp w = S[i][j-1] + gp nw = S[i-1][j-1] + M[seqA[i-1]][seqB[j-1]] # choose best if (n >= max(w, nw)): D[i][j] = 1; # 1 will designate coming from north elif (w >= max(n, nw)): D[i][j] = 2; # 1 will designate coming from west elif (nw >= max(n, w)): D[i][j] = 3; # 1 will designate coming from west S[i][j] = max(n, w, nw); # do the trace-back starting at the last position and build the alignment seqAal = list() seqBal = list() i = y - 2; j = x - 2 while (i >= 0 or j >= 0): # if we came to [i,j] from north, put gap into second sequence if (D[i][j] == 1): seqAal.append(seqA[i]) seqBal.append("-") i -= 1 # if we came to [i,j] from west, put gap into first sequence elif (D[i][j] == 2): seqAal.append("-") seqBal.append(seqB[j]) j -= 1 # if we came to [i,j] from north-west, allign the two characters else: seqAal.append(seqA[i]) seqBal.append(seqB[j]) i -= 1; j -= 1 # since the alignment was build from the end, reverse seqAal.reverse() seqBal.reverse() return S[y-1][x-1], seqAal, seqBal # Align sequnece and environment profile using DP score, profa, seqa = DPAlign(prof, seq, -2, M) # Output alignment for si in seqa: print "%2s" % (si), print for pi in profa: print "%2s" % (pi), print print "score = %.2f" % (score)