#!/usr/bin/env python import sys import matplotlib matplotlib.use('Agg') import pylab import re import datetime if len(sys.argv) <= 1: print "Usage: ./makegraph.py ..." print "(where the files are the output of adddates.py" files = sys.argv[1:] data = [] for n in files: f = open(n, 'r') data.append(f.read()) f.close() class CVE(object): def toDate(self, str): # Discard the time portion if str == 'None': return None date = str.split(' ')[0] y,m,d = [int(x) for x in date.split('-')] return datetime.date(y,m,d) def __init__(self, name, assign, commit, public, source): self.name = name self.commit = commit self.source = source if self.commit == '': print >>sys.stderr, "%s is missing a git commit!" % self.name if self.source == '': print >>sys.stderr, "%s is missing a source for its public disclosure date!" % self.name self.assign = self.toDate(assign) self.public = self.toDate(public) def __cmp__(self, other): return cmp(self.name, other.name) # Strip out comments, break into entries cves = [] for d in data: d2 = re.sub(r'\#.*\n', r'', d) d3 = re.sub(r'\n{2,}', r'\n\n', d2) for entry in d3.strip().split('\n\n'): lines = entry.split("\n") name = lines[0].strip() assignstr = lines[1].split(':', 1)[1].strip() commit = lines[2].split(':', 1)[1].strip() publicstr = lines[3].split(':', 1)[1].strip() source = lines[4].split(':', 1)[1].strip() if assignstr == '' or assignstr == 'None': print "Discarding %s... no assigned date" % name continue if publicstr == '' or publicstr == 'None': print "Discarding %s... no public disclosure date" % name continue cves.append(CVE(name, assignstr, commit, publicstr, source)) hist = [] vuln = [] for day in range(0, 365*3): vuln.append(0) cves.sort() before = 0 after = 0 after2wk = 0 after4wk = 0 after8wk = 0 start = datetime.date(2006, 01, 01) for c in cves: diff = c.assign - c.public for day in range(0, 365*3): if (start.toordinal() + day < c.assign.toordinal()) and \ (start.toordinal() + day >= c.public.toordinal()): # print c.public.toordinal(), start.toordinal() + day, c.assign.toordinal() vuln[day] = vuln[day] + 1 # print c.assign # print "%s: Public announcement predates CVE by %s days" % (c.name,diff.days) # if diff.days >= 350: # hist.append(50) if diff.days >= 14: hist.append(diff.days / 7) if diff.days >= 0: after += 1 else: before += 1 if diff.days >= 14: after2wk += 1 if diff.days >= 28: after4wk += 1 if diff.days >= 56: after8wk += 1 #print vuln print print before+after, "CVEs processed" print after, "had their CVEs assigned after public disclosure" print "after: %0.2f%%" % (float(after)/float(before+after) * 100) print "2 weeks after: %0.2f%% (%i)" % (float(after2wk)/float(before+after) * 100, after2wk) print "4 weeks after: %0.2f%% (%i)" % (float(after4wk)/float(before+after) * 100, after4wk) print "8 weeks after: %0.2f%% (%i)" % (float(after8wk)/float(before+after) * 100, after8wk) print LABELFONTSIZE=20 AXISFONTSIZE=20 pylab.figure() pylab.hist(hist, bins=50) pylab.xlabel('Number of weeks of impact delay', fontsize=LABELFONTSIZE) pylab.ylabel('Number of bugs', fontsize=LABELFONTSIZE) ax = pylab.axes() for xlabel_i in ax.get_xticklabels(): xlabel_i.set_fontsize(AXISFONTSIZE) for ylabel_i in ax.get_yticklabels(): ylabel_i.set_fontsize(AXISFONTSIZE) img = 'histograph.png' pylab.savefig(img) print "Histogram output to %s" % img pylab.figure() pylab.plot(vuln) pylab.xlabel('Day between January 2006 and December 2008', fontsize=LABELFONTSIZE) pylab.ylabel('Number of bugs', fontsize=LABELFONTSIZE) pylab.axis([0, 365*3, 0, 16]) ax = pylab.axes() for xlabel_i in ax.get_xticklabels(): xlabel_i.set_fontsize(AXISFONTSIZE) for ylabel_i in ax.get_yticklabels(): ylabel_i.set_fontsize(AXISFONTSIZE) img = 'vuln-window.png' pylab.savefig(img) print "Line graph output to %s" % img