之前的两篇文章记录梯度下降法的原理 及其矩阵形式的Normal Equations ,这次用代码实现起来,以便加深理解。代码中使用的具体数据来源于斯坦福公开课网站

写在前面

通常进行科学计算的时候,首选工具一般都是matlab,但是我觉得matlab太过臃肿,而且matlab本身作为一种编程语言,在矩阵计算以外有着很多不便的地方,因此果断放弃matlab,转而投向Python的伟大怀抱。网上流传着这样一个等式:numpy+scipy+matplotlib = matlab,其中numpyscipymatplotlib都是开源的Python库,将这几个库组合起来,就能完全取代matlab,并且本身具有良好的可扩展性(Python世界中有着无数的优秀开源软件包)。更多详细资料,可以参考这里

代码

关于梯度下降法的具体Python代码如下:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
import numpy as np
import matplotlib.pyplot as plt


def H(theta,x):
    return theta.dot(x)

def Read():
    x = np.loadtxt('ex3x.dat')
    y = np.loadtxt('ex3y.dat')
    m = len(x)
    t = np.ones((m,1))
    x = np.concatenate((t,x),axis=1)
    return (x,y,m)


#gradient的计算
def cal(alpha,theta,x,y,m):
    n = x.shape[1]
    newtheta = np.array([0]*n,dtype=np.float)
    for j in range(0,n):
        count = 0
        for i in range(m):
            count += (H(theta,x[i,:]) - y[i]) * x[i,j]
        newtheta[j] = (theta[j] - alpha / m * count )
    return newtheta

#Cost Function
def J(theta,x,y,m):
    return np.transpose(x.dot(theta)-y).dot(x.dot(theta)-y)/(2*m)

#Normal Equation
def normalequation(x,y):
    return np.linalg.inv(np.transpose(x).dot(x)).dot(np.transpose(x)).dot(y)

#根据不同的alpha值,画出图像
def gradent_descent():
    (x,y,m) = Read()
    sigma = np.std(x,0)
    mu = np.mean(x,0)
    #数据归一化
    x[:,1] = (x[:,1]-mu[1]) / sigma[1]
    x[:,2] = (x[:,2]-mu[2]) / sigma[2]
    n = x.shape[1]

    theta = np.array([0]*n,dtype=np.float)
    j = []
    alpha=0.01
    for i in range(50):
        j.append(J(theta,x,y,m))
        theta = cal(alpha,theta,x,y,m)
    plt.plot(range(50),j,'b-',label=r'$\alpha = 0.01$')


    j = []
    theta = np.array([0]*n,dtype=np.float)
    alpha=0.03
    for i in range(50):
        j.append(J(theta,x,y,m))
        theta = cal(alpha,theta,x,y,m)
    plt.plot(range(50),j,'r-',label=r'$\alpha = 0.03$')

    j = []
    theta = np.array([0]*n,dtype=np.float)
    alpha=0.1
    for i in range(50):
        j.append(J(theta,x,y,m))
        theta = cal(alpha,theta,x,y,m)
    plt.plot(range(50),j,'y-',label=r'$\alpha = 0.1$')

    j = []
    theta = np.array([0]*n,dtype=np.float)
    alpha=0.3
    for i in range(50):
        j.append(J(theta,x,y,m))
        theta = cal(alpha,theta,x,y,m)
    plt.plot(range(50),j,'b--',label=r'$\alpha = 0.3$')


    j = []
    theta = np.array([0]*n,dtype=np.float)
    alpha=1
    for i in range(50):
        j.append(J(theta,x,y,m))
        theta = cal(alpha,theta,x,y,m)
    plt.plot(range(50),j,'r--',label=r'$\alpha = 1$')

    j = []
    theta = np.array([0]*n,dtype=np.float)
    alpha=1.3
    for i in range(50):
        j.append(J(theta,x,y,m))
        theta = cal(alpha,theta,x,y,m)
    plt.plot(range(50),j,'y--',label=r'$\alpha = 1.3$')


    plt.xlabel('Number of interations')
    plt.ylabel('Cost J')
    plt.legend()
    plt.show()


if __name__=='__main__':

    #画出图像
    gradent_descent()
    (x,y,m) = Read()
    #利用normal equation计算theta值
    theta = (normalequation(x,y))
    #预测未知数据
    print('predict:',end=' ')
    print(H(theta,np.array([1,1650,3])))

运行结果

参数变化

Share on: TwitterFacebookEmail


Flyaway is the owner of this blog.
Comments

So what do you think? Did I miss something? Is any part unclear? Leave your comments below

comments powered by Disqus

Reading Time

~2 min read

Published

Category

machine-learning

Tags

Contact