If you don't know the concept of Linear Regression, please turn to the course of Machine Learning lectured by Andrew Ng. There's a dedicated Unit about it. I use the data from the corresponding exercise for coding here. The next material I consulted intensively is TensorFlow for Machine Intelligence. I have no intention to infringe the copyright. So any praise and further permission go to them if you consider use the code pasted here for commercial purpose.
The code is just as follows, I would think it's self-explained so I purposely avoid any further explanation. However comments are welcome and probably I will refine it later:
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from matplotlib import style
import cPickle as pickle
import os
# global scope
style.use('ggplot')
trained = False
W = tf.Variable(tf.random_normal([1, 1], mean = 1.0, stddev = 0.5, dtype = tf.float64), name="weights")
b = tf.Variable(0.0, dtype = tf.float64, name="bias")
sess = tf.Session()
def loadData(fileName):
if not os.path.exists(fileName):
print("Non-exist file %s" % fileName)
exit()
dataSet = []
baseName = os.path.basename(fileName)
extName = baseName + '.pkl'
objFileName = os.path.join(os.path.dirname(fileName), extName)
if os.path.exists(objFileName):
with open(objFileName) as f:
dataSet = pickle.load(f)
else:
with open(fileName) as f:
for l in f.readlines():
cont = l.strip().split(',')
data = map(float, cont)
dataSet.append(data)
with open(objFileName, 'wb') as f:
pickle.dump(dataSet, f, True)
return dataSet
def dispData(dataSet):
dataMat = np.mat(dataSet)
x = dataMat[:, 0]
Y = dataMat[:, 1]
plt.scatter(x, Y)
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()
def calc(X):
return tf.matmul(X, W) + b
def train(fileName = 'ex1data1.txt', trainSteps = 1000):
global trained
trained = True
def inputs(fileName):
dataSet = loadData(fileName)
dataMat = np.mat(dataSet)
X = dataMat[:, :-1]
Y = dataMat[:, -1]
return X, Y
def loss(X, Y):
Y_ = calc(X)
return tf.reduce_mean(tf.squared_difference(Y, Y_))
learningRate = 0.01
def trainHelper(totalLoss):
return tf.train.GradientDescentOptimizer(learningRate).minimize(totalLoss)
sess.run(tf.initialize_all_variables())
X, Y = inputs(fileName)
totalLoss = loss(X, Y)
trainOp = trainHelper(totalLoss)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess = sess, coord = coord)
for step in range(trainSteps):
sess.run([trainOp])
# print("loss is ", totalLoss.eval(session = sess))
coord.request_stop()
coord.join(threads)
# should save the model
def inference(x):
global trained
if trained == False:
train()
ret = sess.run(calc(x))
return ret
def plotRegLine(dataSet):
dataMat = np.mat(dataSet)
X = dataMat[:, :-1]
Y = dataMat[:, -1]
dotsX = np.vstack((min(X), max(X)))
dotsY = sess.run(calc(dotsX))
plt.scatter(X, Y)
plt.plot(dotsX, dotsY)
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()
if __name__ == '__main__':
dataSet = loadData('ex1data1.txt')
# dispData(dataSet)
# print inference(tf.to_double([[10.0]]))
train()
plotRegLine(dataSet)
sess.close()
Here is the figure:
The code is just as follows, I would think it's self-explained so I purposely avoid any further explanation. However comments are welcome and probably I will refine it later:
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from matplotlib import style
import cPickle as pickle
import os
# global scope
style.use('ggplot')
trained = False
W = tf.Variable(tf.random_normal([1, 1], mean = 1.0, stddev = 0.5, dtype = tf.float64), name="weights")
b = tf.Variable(0.0, dtype = tf.float64, name="bias")
sess = tf.Session()
def loadData(fileName):
if not os.path.exists(fileName):
print("Non-exist file %s" % fileName)
exit()
dataSet = []
baseName = os.path.basename(fileName)
extName = baseName + '.pkl'
objFileName = os.path.join(os.path.dirname(fileName), extName)
if os.path.exists(objFileName):
with open(objFileName) as f:
dataSet = pickle.load(f)
else:
with open(fileName) as f:
for l in f.readlines():
cont = l.strip().split(',')
data = map(float, cont)
dataSet.append(data)
with open(objFileName, 'wb') as f:
pickle.dump(dataSet, f, True)
return dataSet
def dispData(dataSet):
dataMat = np.mat(dataSet)
x = dataMat[:, 0]
Y = dataMat[:, 1]
plt.scatter(x, Y)
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()
def calc(X):
return tf.matmul(X, W) + b
def train(fileName = 'ex1data1.txt', trainSteps = 1000):
global trained
trained = True
def inputs(fileName):
dataSet = loadData(fileName)
dataMat = np.mat(dataSet)
X = dataMat[:, :-1]
Y = dataMat[:, -1]
return X, Y
def loss(X, Y):
Y_ = calc(X)
return tf.reduce_mean(tf.squared_difference(Y, Y_))
learningRate = 0.01
def trainHelper(totalLoss):
return tf.train.GradientDescentOptimizer(learningRate).minimize(totalLoss)
sess.run(tf.initialize_all_variables())
X, Y = inputs(fileName)
totalLoss = loss(X, Y)
trainOp = trainHelper(totalLoss)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess = sess, coord = coord)
for step in range(trainSteps):
sess.run([trainOp])
# print("loss is ", totalLoss.eval(session = sess))
coord.request_stop()
coord.join(threads)
# should save the model
def inference(x):
global trained
if trained == False:
train()
ret = sess.run(calc(x))
return ret
def plotRegLine(dataSet):
dataMat = np.mat(dataSet)
X = dataMat[:, :-1]
Y = dataMat[:, -1]
dotsX = np.vstack((min(X), max(X)))
dotsY = sess.run(calc(dotsX))
plt.scatter(X, Y)
plt.plot(dotsX, dotsY)
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()
if __name__ == '__main__':
dataSet = loadData('ex1data1.txt')
# dispData(dataSet)
# print inference(tf.to_double([[10.0]]))
train()
plotRegLine(dataSet)
sess.close()
Here is the figure:
No comments:
Post a Comment