Gradient descent from scratch with Python
From the Data Science from Scratch book.
import random
from typing import List, Callable
import pandas as pd
import altair as alt
Vector = List[float]
Vector
def add(vector1: Vector, vector2: Vector) -> Vector:
assert len(vector1) == len(vector2)
return [v1 + v2 for v1, v2 in zip(vector1, vector2)]
def scalar_multiply(c: float, vector: Vector) -> Vector:
return [c * v for v in vector]
def dot(vector1: Vector, vector2: Vector) -> float:
assert len(vector1) == len(vector2)
return sum(v1 * v2 for v1, v2 in zip(vector1, vector2))
def sum_of_squares(v: Vector) -> Vector:
return dot(v, v)
def magnitude(v: Vector) -> Vector:
return m.sqrt(sum_of_squares(v))
def distance(vector1: Vector, vector2: Vector) -> Vector:
return magnitude(subtract(vector1, vector2))
def vector_sum(vectors: List[Vector]) -> Vector:
assert vectors
vector_length = len(vectors[0])
assert all(len(v) == vector_length for v in vectors)
sums = [0] * vector_length
for vector in vectors:
sums = add(sums, vector)
return sums
def vector_mean(vectors: List[Vector]) -> Vector:
n = len(vectors)
return scalar_multiply(1/n, vector_sum(vectors))
def estimate_gradient(
f: Callable[[Vector], float],
v: Vector,
h: float = 0.0001
):
return [
partial_difference_quotient(f, v, i, h)
for i in range(len(v))
]
v = [random.uniform(-10, 10) for i in range(3)]
v
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:
"""Return vector adjusted with step. Step is gradient times step size.
"""
step = scalar_multiply(step_size, gradient)
return add(v, step)
v, gradient_step(v, [1, 1, 1], step_size=0.1)
def sum_of_squares_gradient(v: Vector) -> Vector:
return [2 * v_i for v_i in v]
sum_of_squares_gradient(v)
steps = pd.DataFrame()
for epoch in range(1000):
grad = sum_of_squares_gradient(v)
v = gradient_step(v, grad, -0.01)
steps = steps.append(
{'x': v[0], 'y': v[1], 'z': v[2]}, ignore_index=True
)
steps
alt.Chart(steps.reset_index().melt(id_vars='index')).mark_point().encode(
alt.X('index:Q'), alt.Y('value:Q'), alt.Color('variable:N')
).properties(title='Gradient steps')
inputs = [(x, 20 * x + 5) for x in range(-50, 50)]
inputs[:20]
We use a simple linear gradient
def linear_gradient(x: float, y: float, theta: Vector) -> Vector:
slope, intercept = theta
predicted = slope * x + intercept
error = (predicted - y) #** 2
return [2 * error * x, 2 * error]
x, y = inputs[0][0], inputs[0][1]
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
x, y, theta, linear_gradient(x, y, theta)
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
learning_rate = 0.001
for epoch in range(20):
grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs])
theta = gradient_step(theta, grad, -learning_rate)
print(epoch, theta)