1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
| import numpy as np from scipy.optimize import minimize from tqdm.notebook import tqdm from Kernel_FeatureMaps import *;
class SVM(object): """ ## Author: Hua Yao (hy2632@columbia.edu) ## Description: SVM binary classifier, optimizing with the dual lagrangian program, trained on a sample batch. Uses the SMO algorithm. Normalizes input X to adapt to kernelized version. Regularization parameter C set to be `np.inf` for easy closed form solution of b. ## Reference: [CS229 - Kernel Methods and SVM](http://cs229.stanford.edu/notes2020fall/notes2020fall/cs229-notes3.pdf) --- Parameters: --------- X: (N, d) Y: (N,) kernel: kernel used, linear/softmax featuremap: feature mapping corresponding to the kernel used batch_size: int, also denoted as n C: l1 regularization term for soft margin. alpha_i in [0, C]. Set as np.inf (no regularization), because the form of b is nasty under regularization. tol = 1e-6: tolerance, deciding when to end training Intermediate parameters: --------- x: (n, d), random batch of X y: (n) phi_x: (n, m), feature map(s) of x M: (n, n), M[i,j] = y_iy_j * K(x_i,x_j), hadamard product of y^Ty and K Learned parameters: --------- alpha: (n,) w: (d,) b: int """ def __init__(self, X, Y, kernel=Kernel.Linear, featuremap=FeatureMap.Linear, batch_size=64, C=np.inf, tol=1e-6):
self.N, self.d = X.shape self.X = X self.X = self.X / np.linalg.norm(self.X, axis=1, keepdims=True) self.Y = Y self.kernel = kernel self.featuremap = featuremap self.n = batch_size self.C = C self.tol = tol
batch_indices = np.random.choice(np.arange(self.N), self.n) self.x = self.X[batch_indices] self.y = self.Y[batch_indices] self.phi_x = self.featuremap(self.x) self.M = np.outer(self.y, self.y) * self.kernel(self.x, self.x)
self.alpha = np.ones(self.n) self.w = np.zeros(self.d) self.b = 0
def update_alpha(self, random_idx1, random_idx2): Zeta = -np.sum(self.alpha * self.y) + ( self.alpha * self.y)[random_idx1] + (self.alpha * self.y)[random_idx2] self.alpha[random_idx1] = (Zeta - self.alpha[random_idx2] * self.y[random_idx2]) * self.y[random_idx1]
def dual_obj(self, alpha): return np.sum(alpha) - np.sum(0.5 * self.M * np.outer(alpha, alpha))
def fit(self, iterations=200000): prev_val = self.alpha.copy()
for i in tqdm(range(iterations)): random_idx1, random_idx2 = np.random.choice( np.arange(0, self.n), 2)
def optimizeWRTa2(a2): self.alpha[random_idx2] = a2 self.update_alpha(random_idx1, random_idx2) return -self.dual_obj(self.alpha)
a2 = self.alpha[random_idx2] res = minimize(optimizeWRTa2, a2, bounds=[(0, self.C)]) a2 = res.x
self.alpha[random_idx2] = a2 self.update_alpha(random_idx1, random_idx2)
if (i % 5 == 1): if np.sum(np.abs(self.alpha - prev_val)) < self.tol: print( f">> Optimized on the batch, step {i}. 5 steps Δalpha:{np.sum(np.abs(self.alpha - prev_val))}" ) return else: if (i % 5000 == 1): print( f">> Optimizing, step {i}. Δalpha:{np.sum(np.abs(self.alpha - prev_val))}" ) prev_val = self.alpha.copy()
self.w = np.dot(self.alpha * self.y, self.phi_x) self.b = (np.max(np.dot(self.phi_x, self.w)[self.y == -1]) + np.min(np.dot(self.phi_x, self.w)[self.y == 1])) * -0.5
def predict(self, X_val): X_val_normed = X_val / np.linalg.norm(X_val, axis=1, keepdims=True) return np.sign( np.dot(self.kernel(X_val_normed, self.x), self.alpha * self.y) + self.b)
def score(self, X_val, y_val): prediction = self.predict(X_val) return np.mean(prediction == y_val)
|