Wednesday, October 26, 2016

Linear Regression with TensorFlow

If you don't know the concept of Linear Regression, please turn to the course of Machine Learning lectured by Andrew Ng. There's a dedicated Unit about it. I use the data from the corresponding exercise for coding here. The next material I consulted intensively is TensorFlow for Machine Intelligence. I have no intention to infringe the copyright. So any praise and further permission go to them if you consider use the code pasted here for commercial purpose.

The code is just as follows, I would think it's self-explained so I purposely avoid any further explanation. However comments are welcome and probably I will refine it later:


import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from matplotlib import style
import cPickle as pickle
import os


# global scope
style.use('ggplot')

trained = False

W = tf.Variable(tf.random_normal([1, 1], mean = 1.0, stddev = 0.5, dtype = tf.float64), name="weights")
b = tf.Variable(0.0, dtype = tf.float64, name="bias")

sess = tf.Session()

def loadData(fileName):
    if not os.path.exists(fileName):
        print("Non-exist file %s" % fileName)
        exit()

    dataSet = []
    baseName = os.path.basename(fileName)
    extName = baseName + '.pkl'
    objFileName = os.path.join(os.path.dirname(fileName), extName)
    if os.path.exists(objFileName):
        with open(objFileName) as f:
            dataSet = pickle.load(f)
    else:
        with open(fileName) as f:
            for l in f.readlines():
                cont = l.strip().split(',')
                data = map(float, cont)
                dataSet.append(data)
        with open(objFileName, 'wb') as f:
            pickle.dump(dataSet, f, True)

    return dataSet

def dispData(dataSet):
    dataMat = np.mat(dataSet)
    x = dataMat[:, 0]
    Y = dataMat[:, 1]

    plt.scatter(x, Y)

    plt.xlabel('X axis')
    plt.ylabel('Y axis')

    plt.show()

def calc(X):
    return tf.matmul(X, W) + b

def train(fileName = 'ex1data1.txt', trainSteps = 1000):
    global trained

    trained = True

    def inputs(fileName):
        dataSet = loadData(fileName)
        dataMat = np.mat(dataSet)
        X = dataMat[:, :-1]
        Y = dataMat[:, -1]
        return X, Y

    def loss(X, Y):
        Y_ = calc(X)
        return tf.reduce_mean(tf.squared_difference(Y, Y_))

    learningRate = 0.01
    def trainHelper(totalLoss):
        return tf.train.GradientDescentOptimizer(learningRate).minimize(totalLoss)

    sess.run(tf.initialize_all_variables())

    X, Y = inputs(fileName)
    totalLoss = loss(X, Y)

    trainOp = trainHelper(totalLoss)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess = sess, coord = coord)
    
    for step in range(trainSteps):
        sess.run([trainOp])
        # print("loss is ", totalLoss.eval(session = sess))

    coord.request_stop()
    coord.join(threads)

    # should save the model

def inference(x):
    global trained

    if trained == False:
        train()
    
    ret = sess.run(calc(x))
    return ret

def plotRegLine(dataSet):
    dataMat = np.mat(dataSet)
    X = dataMat[:, :-1]
    Y = dataMat[:, -1]

    dotsX = np.vstack((min(X), max(X)))
    dotsY = sess.run(calc(dotsX))

    plt.scatter(X, Y)
    plt.plot(dotsX, dotsY)

    plt.xlabel('X axis')
    plt.ylabel('Y axis')

    plt.show()
  


if __name__ == '__main__':
    dataSet = loadData('ex1data1.txt')
    # dispData(dataSet)
    # print inference(tf.to_double([[10.0]]))
    train()
    plotRegLine(dataSet)

    sess.close()


Here is the figure:


No comments:

Post a Comment