# The third kind of thing, an object with methods: from basics import * import math class Point: def __init__(self, x, y): self.x = x self.y = y def distance(self, other): return math.sqrt((self.x - other.x) ** 2 + (self.y - other.y) ** 2) def __str__(self): return ("<%s,%s>" % (self.x, self.y)) # every method has first argument self # make new instances by using the class name as a function # the initializer is called __init__ and there are other special names: # instances are printed with __str__ # http://docs.python.org/reference/datamodel.html#special-method-names def test_point(): assert_equal("<3,4>", str(Point(3,4))) assert_equal(5, Point(0,0).distance(Point(3,4))) test_point() # N-dimensional? Lists! # zero based, # [start : end : step] # can be negative # indexes between elements # too large is error class NdPoint: def __init__(self, coordinates): self.coordinates = coordinates[:] # Guard against mutation! def __str__(self): return "<"+ ",".join(map(str,self.coordinates)) + ">" def dimensionality(self): return len(self.coordinates) def same_dimension(self, other): return self.dimensionality() == other.dimensionality() def distance1(self, other): # with a for loop over indexes assert self.same_dimension(other) sum_of_squared_differences = 0 for i in range(0, self.dimensionality()): sum_of_squared_differences += (self.coordinates[i] - other.coordinates[i]) ** 2 return math.sqrt(sum_of_squared_differences) def distance2(self, other): # with a for loop over item pairs assert self.same_dimension(other) sum_of_squared_differences = 0 for current_pair in zip(self.coordinates, other.coordinates): sum_of_squared_differences += (current_pair[0] - current_pair[1]) ** 2 return math.sqrt(sum_of_squared_differences) def distance3(self, other): # with unpacked item pairs assert self.same_dimension(other) sum_of_squared_differences = 0 for x1,x2 in zip(self.coordinates, other.coordinates): sum_of_squared_differences += (x1 - x2) ** 2 return math.sqrt(sum_of_squared_differences) def distance4(self, other): # with a list comprehension assert self.same_dimension(other) return math.sqrt(sum([(x-y)**2 for x,y in zip(self.coordinates, other.coordinates)])) def distance(self, other): return self.distance4(other) def test_ndpoint(): p358 = NdPoint([3,5,8]) assert_equal("<3,5,8>", str(p358)) zero = NdPoint([0,0,0]) assert_equal(0, zero.distance(zero)) assert_equal(0, p358.distance(p358)) assert_equal(5, NdPoint([0,0]).distance(NdPoint([3,4]))) for dist in [NdPoint.distance1, NdPoint.distance2, NdPoint.distance3, NdPoint.distance4]: assert_equal(5, dist(zero, NdPoint([0,3,4]))) assert_equal(5, dist(zero, NdPoint([3,4,0]))) assert_equal(5, dist(NdPoint([1,1,1]), NdPoint([1,4,5]))) assert_equal(5, dist(NdPoint([1,1,1]), NdPoint([4,5,1]))) assert_equal(25/3.0, dist(zero, NdPoint([3,4,20/3.0]))) try: NdPoint([1,2,3]).distance(NdPoint([1,2,3,4])) assert False, 'should not get distance between diff. dim. points' except AssertionError: pass test_ndpoint() # Sparse feature sets: Dictionary class SparseFeatureVector: def __init__(self, features): # takes a list of arbitrary feature names self.sfv = {} # a dictionary for feature in features: self.sfv[feature] = True def __str__(self): return str(self.sfv.keys()) def __len__(self): return len(self.sfv) def __contains__(self, item): return item in self.sfv def __iter__(self): return self.sfv.__iter__() def __eq__(self, other): return self.sfv == other.sfv def intersection1(self, other): result = [] for item in self.sfv.keys(): # .keys() is default, so redundant. if item in other.sfv: result.append(item) return SparseFeatureVector(result) def intersection2(self, other): return SparseFeatureVector([x for x in self if x in other]) def intersection(self, other): return self.intersection2(other) def distance(self, other): return 1-(len(self.intersection(other)) / (math.sqrt(len(self)) * math.sqrt(len(other)))) def assert_near(expected, observed, epsilon=1e-5, message=""): assert near(expected, observed, epsilon), ("Expected %s within %s of %s: %s" % (expected, epsilon, observed, message)) print "ok." def test_sfv(): a = SparseFeatureVector(list("the quick brown fox")) # coersion -> letters # bc ef hi k no qr tu wx _ # 16 letters b = SparseFeatureVector(list("jumps over the lazy dog")) # a de gh j lm op rstuv yz_ # 18 letters # overlap = 7 assert_near(a.distance(b), b.distance(a)) assert_near(0, a.distance(a)) assert_near(0, b.distance(b)) for intersector in [SparseFeatureVector.intersection1, SparseFeatureVector.intersection2]: assert_equal(SparseFeatureVector(list(" ehtoru")), intersector(a, b)) assert_equal(SparseFeatureVector(list(" ehtoru")), intersector(b, a)) assert_near(1-(7/(4*math.sqrt(18))), a.distance(b)) assert_near(1, SparseFeatureVector(["foo", "bar", "baz"]). distance(SparseFeatureVector(["fifi", "quux", "zeb"]))) assert_near(0.5, SparseFeatureVector(["foo","bar"]). distance(SparseFeatureVector(["foo","baz"]))) test_sfv()