Deep Learning Pearls

Monday, October 31, 2016

Handwritten digits recognition via TensorFlow based on Windows MFC (I) - Load MNist image data

I would like to write a series of posts on how to utilize TensorFlow doing handwritten digit recognition on Windows based on MFC application. The procedures will be first trained a model based on Linux system such Ubuntu, then export the data of the model to Windows. Then try to build an application which can load the model. The application based on MFC will acts as front-end role, sending the image to the application hosting the model, and retrieve the recognized result.

In this posts, I will focus on how to load MNist image data. Please referring to the following linkage for details:
http://yann.lecun.com/exdb/mnist/

I only past the source code here without further explanation, please note I just test the code in VC++ 2015, so I am not sure it's workable under other VC versions:

Header file:

#pragma once

#include <Windows.h>

#define MNIST_MAGIC_IMAGE 0x00000803
#define MNIST_MAGIC_LABEL 0x00000801

#define IMAGE_HEIGHT 28
#define IMAGE_WIDTH 28
#define LABEL_SIZE 1

typedef enum tagMNIST_TYPE
{
IMAGE = 0,
LABEL = 1,
} MNIST_TYPE;

#pragma pack(push, 1)
typedef struct tagMNIST_IMAGE
{
int magic;
int items;
int rows;
int cols;
unsigned char* data;
} MNIST_IMAGE;

typedef struct tagMNIST_LABEL
{
int magic;
int items;
unsigned char* label;
} MNIST_LABEL;

#pragma pack(pop)

typedef unsigned char IMAGE_DATA[IMAGE_HEIGHT][IMAGE_WIDTH];
typedef unsigned char LABEL_DATA[LABEL_SIZE];

class CMnistReader
{
public:
CMnistReader(LPCTSTR path, MNIST_TYPE type = IMAGE);
~CMnistReader();

bool operator !()
{
return !m_bInit;
}

bool GetNextImage(IMAGE_DATA img);
bool GetNextLabel(LABEL_DATA lb);

bool GetPrevImage(IMAGE_DATA img);
bool GetPrevLabel(LABEL_DATA lb);

bool GetImage(IMAGE_DATA img, int idx);
bool GetLabel(LABEL_DATA lb, int idx);

private:
MNIST_TYPE m_type;
bool m_bInit;
HANDLE m_hFile;
HANDLE m_hMapFile;
DWORD m_dwFileSize;
unsigned char* m_lpMapAddress;
unsigned char* m_lpCurAddress;
union {
MNIST_IMAGE m_image;
MNIST_LABEL m_label;
};

};

class CBitmapConverter
{
public:
CBitmapConverter(HDC hdc);
~CBitmapConverter();

bool Convert(IMAGE_DATA data);
HBITMAP GetBmpHandle() { return m_hBitmap; }

private:
HBITMAP m_hBitmap;
unsigned char* m_lpBitmapBits;
};

Source Files:

#include "stdafx.h"

#include <WinSock2.h>
#include "MnistReader.h"

#pragma comment(lib, "Ws2_32.lib")

CMnistReader::CMnistReader(LPCTSTR path, MNIST_TYPE type) :
m_type(type),
m_bInit(false),
m_hFile(INVALID_HANDLE_VALUE),
m_hMapFile(NULL),
m_lpMapAddress(NULL)
{
m_hFile = CreateFile(path, GENERIC_READ, 0, NULL, OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);

if (m_hFile == INVALID_HANDLE_VALUE)
{
OutputDebugString(_T("CreateFile() Failed"));
return;
}

m_dwFileSize = GetFileSize(m_hFile, NULL);
if (type == IMAGE && m_dwFileSize < sizeof(m_image) ||
type == LABEL && m_dwFileSize < sizeof(m_label))
{
OutputDebugString(_T("Invalid File Size"));
return;
}

m_hMapFile = CreateFileMapping(m_hFile, NULL, PAGE_READONLY,
0, m_dwFileSize, NULL);

if (m_hMapFile == NULL)
{
OutputDebugString(_T("CreateFileMapping() Failed"));
return;
}

m_lpMapAddress = (unsigned char*)MapViewOfFile(m_hMapFile, FILE_MAP_READ, 0, 0, m_dwFileSize);

if (m_lpMapAddress == NULL)
{
OutputDebugString(_T("MapViewOfFile() Failed"));
return;
}

switch (type)
{
case IMAGE:
memcpy(&m_image, m_lpMapAddress, sizeof(MNIST_IMAGE));
m_image.magic = htonl(m_image.magic);
m_image.items = htonl(m_image.items);
m_image.rows = htonl(m_image.rows);
m_image.cols = htonl(m_image.cols);
m_lpCurAddress = m_lpMapAddress + offsetof(MNIST_IMAGE, data);

if (m_image.magic != MNIST_MAGIC_IMAGE)
{
OutputDebugString(_T("Invalid Image File Format"));
return;
}
break;
case LABEL:
memcpy(&m_label, m_lpMapAddress, sizeof(MNIST_LABEL));
m_label.magic = htonl(m_label.magic);
m_label.items = htonl(m_label.items);
m_lpCurAddress = m_lpMapAddress + offsetof(MNIST_LABEL, label);

if (m_label.magic != MNIST_MAGIC_LABEL)
{
OutputDebugString(_T("Invalid Image File Format"));
return;
}
break;
}

m_bInit = true;
}

CMnistReader::~CMnistReader()
{
if (m_lpMapAddress)
UnmapViewOfFile(m_lpMapAddress);
if (m_hMapFile)
CloseHandle(m_hMapFile);
if(m_hFile != INVALID_HANDLE_VALUE)
CloseHandle(m_hFile);
}

bool CMnistReader::GetNextImage(IMAGE_DATA img)
{
if (m_type == IMAGE &&
(m_lpMapAddress + m_dwFileSize - m_lpCurAddress) >= sizeof(IMAGE_DATA))
{
memcpy(img, m_lpCurAddress, sizeof(IMAGE_DATA));
m_lpCurAddress += sizeof(IMAGE_DATA);
return true;
}
else
{
memset(img, 0, sizeof(IMAGE_DATA));
return false;
}
}

bool CMnistReader::GetNextLabel(LABEL_DATA lb)
{
if (m_type == LABEL &&
(m_lpMapAddress + m_dwFileSize - m_lpCurAddress) >= sizeof(LABEL_DATA))
{
memcpy(lb, m_lpCurAddress, sizeof(LABEL_DATA));
m_lpCurAddress += sizeof(LABEL_DATA);
return true;
}
else
{
memset(lb, 0xff, sizeof(LABEL_DATA));
return false;
}
}

bool CMnistReader::GetPrevImage(IMAGE_DATA img)
{
if (m_type == IMAGE &&
(m_lpCurAddress - sizeof(IMAGE_DATA)) >=
(m_lpMapAddress + offsetof(MNIST_IMAGE, data)))
{
m_lpCurAddress -= sizeof(IMAGE_DATA);
memcpy(img, m_lpCurAddress, sizeof(IMAGE_DATA));
return true;
}
else
{
memset(img, 0, sizeof(IMAGE_DATA));
return false;
}
}

bool CMnistReader::GetPrevLabel(LABEL_DATA lb)
{
if (m_type == LABEL &&
(m_lpCurAddress - sizeof(LABEL_DATA)) >=
(m_lpMapAddress + offsetof(MNIST_LABEL, label)))
{
m_lpCurAddress -= sizeof(LABEL_DATA);
memcpy(lb, m_lpCurAddress, sizeof(LABEL_DATA));
return true;
}
else
{
memset(lb, 0xff, sizeof(LABEL_DATA));
return false;
}
}

bool CMnistReader::GetImage(IMAGE_DATA img, int idx)
{
// Not implemented
return false;
}

bool CMnistReader::GetLabel(LABEL_DATA lb, int idx)
{
// Not implemented
return false;
}

CBitmapConverter::CBitmapConverter(HDC hdc)
{
BITMAPINFO bi;
ZeroMemory(&bi, sizeof(BITMAPINFO));
bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
bi.bmiHeader.biWidth = IMAGE_WIDTH;
bi.bmiHeader.biHeight = -IMAGE_HEIGHT;
bi.bmiHeader.biPlanes = 1;
bi.bmiHeader.biBitCount = 8;
bi.bmiHeader.biCompression = BI_RGB;

m_hBitmap = CreateDIBSection(hdc, &bi, DIB_RGB_COLORS, (VOID**)&m_lpBitmapBits, NULL, 0);
}

CBitmapConverter::~CBitmapConverter()
{
if (m_hBitmap)
DeleteObject(m_hBitmap);
}

bool CBitmapConverter::Convert(IMAGE_DATA data)
{
unsigned char* lpBitmapBits = m_lpBitmapBits;

#define ALIGN(x,a) __ALIGN_MASK(x, a-1)
#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
int width = IMAGE_WIDTH;
int pitch = ALIGN(width, 4);
#undef __ALIGN_MASK
#undef ALIGN

for (int i = 0; i < IMAGE_HEIGHT; i++)
{
unsigned char* v = data[i];
memcpy(lpBitmapBits, v, width);
lpBitmapBits += pitch;
}

return true;
}

The running result:

Wednesday, October 26, 2016

Linear Regression with TensorFlow

If you don't know the concept of Linear Regression, please turn to the course of Machine Learning lectured by Andrew Ng. There's a dedicated Unit about it. I use the data from the corresponding exercise for coding here. The next material I consulted intensively is TensorFlow for Machine Intelligence. I have no intention to infringe the copyright. So any praise and further permission go to them if you consider use the code pasted here for commercial purpose.

The code is just as follows, I would think it's self-explained so I purposely avoid any further explanation. However comments are welcome and probably I will refine it later:

import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from matplotlib import style
import cPickle as pickle
import os

# global scope
style.use('ggplot')

trained = False

W = tf.Variable(tf.random_normal([1, 1], mean = 1.0, stddev = 0.5, dtype = tf.float64), name="weights")
b = tf.Variable(0.0, dtype = tf.float64, name="bias")

sess = tf.Session()

def loadData(fileName):
if not os.path.exists(fileName):
print("Non-exist file %s" % fileName)
exit()

dataSet = []
baseName = os.path.basename(fileName)
extName = baseName + '.pkl'
objFileName = os.path.join(os.path.dirname(fileName), extName)
if os.path.exists(objFileName):
with open(objFileName) as f:
dataSet = pickle.load(f)
else:
with open(fileName) as f:
for l in f.readlines():
cont = l.strip().split(',')
data = map(float, cont)
dataSet.append(data)
with open(objFileName, 'wb') as f:
pickle.dump(dataSet, f, True)

return dataSet

def dispData(dataSet):
dataMat = np.mat(dataSet)
x = dataMat[:, 0]
Y = dataMat[:, 1]

plt.scatter(x, Y)

plt.xlabel('X axis')
plt.ylabel('Y axis')

plt.show()

def calc(X):
return tf.matmul(X, W) + b

def train(fileName = 'ex1data1.txt', trainSteps = 1000):
global trained

trained = True

def inputs(fileName):
dataSet = loadData(fileName)
dataMat = np.mat(dataSet)
X = dataMat[:, :-1]
Y = dataMat[:, -1]
return X, Y

def loss(X, Y):
Y_ = calc(X)
return tf.reduce_mean(tf.squared_difference(Y, Y_))

learningRate = 0.01
def trainHelper(totalLoss):
return tf.train.GradientDescentOptimizer(learningRate).minimize(totalLoss)

sess.run(tf.initialize_all_variables())

X, Y = inputs(fileName)
totalLoss = loss(X, Y)

trainOp = trainHelper(totalLoss)

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess = sess, coord = coord)

for step in range(trainSteps):
sess.run([trainOp])
# print("loss is ", totalLoss.eval(session = sess))

coord.request_stop()
coord.join(threads)

# should save the model

def inference(x):
global trained

if trained == False:
train()

ret = sess.run(calc(x))
return ret

def plotRegLine(dataSet):
dataMat = np.mat(dataSet)
X = dataMat[:, :-1]
Y = dataMat[:, -1]

dotsX = np.vstack((min(X), max(X)))
dotsY = sess.run(calc(dotsX))

plt.scatter(X, Y)
plt.plot(dotsX, dotsY)

plt.xlabel('X axis')
plt.ylabel('Y axis')

plt.show()


if __name__ == '__main__':
dataSet = loadData('ex1data1.txt')
# dispData(dataSet)
# print inference(tf.to_double([[10.0]]))
train()
plotRegLine(dataSet)

sess.close()

Here is the figure:

Sunday, October 23, 2016

A formula in Conditional Probability

I am sorry that I am not quite familiar with Latex, so all the mathematical formula texted here is ugly.

A formula related to conditional probability is sometimes overlooked however sometimes it's quite convenient to use it. The equation is
P(A, B | C) = P(A | B, C) * P(B | C) (1)

The proof is direct, since
P(A, B | C) = P(A, B, C) / P(C) (2)

and
P(A | B, C) = P(A, B, C) / P(B, C) (3)
P(B | C) = P(B, C) / P(C) (4)

Multiply the right sides of both (3) and (4) will lead to the right side of (2), so the formula holds

Saturday, October 1, 2016

How to build and test new Op in TensorFlow

I would think install TensorFlow from source code would be a benefit. However if one didn't do so, how can one build and test the new Op in TensorFlow?

For building the Op just with binary installed, just write a simple Makefile in the directory where source code resides, and put the following content into it:

TF_INC=$(shell python -c "from tensorflow import sysconfig; print(sysconfig.get_include())")
all:
g++ -std=c++11 -shared foo_bar.cc -o foo_bar.so -fPIC -I ${TF_INC} -D_GLIBCXX_USE_CXX11_ABI=0

For testing, just write the following Python script:

import tensorflow as tf

import unittest

def test():

class FooBarTest(unittest.TestCase):

def testFooBar(self):

foo_bar_module = tf.load_op_library('/directory/to/foo_bar.so')

with tf.Session():

result = foo_bar_module.foo_bar([1, 2, 3])

self.assertListEqual(result.eval().tolist(), [1, 2, 3])

suite = unittest.TestSuite()

for test_name in ['testFooBar']:

suite.addTest(FooBarTest(test_name))

unittest.TextTestRunner(verbosity = 2).run(suite)

if __name__ == '__main__':

test()

Enjoy the hacking!

Tuesday, September 20, 2016

Commands for setting wireless hosted network on Windows 10

Sometimes it's convenient to setup a wireless hosted network on Windows, especially without a soft AP. When living or going to some countries with Internet access restriction, it's convenient to setup a VPN connection, then setup a hosted network to share the Internet connection of VPN.

Following are commands to ease such a task.

First is to create the hosted network:

netsh wlan set hostednetwork mode=allow ssid=Your-SSID key="Your-Password"
netsh wlan start hostednetwork

Then do the sharing:

One can inspect the status of the hosted network via the following command:
netsh wlan show hostednetwork
or
netsh wlan show hostednetwork setting=security

If after some time, not necessary to continue the host network, the following command can stop it:
netsh wlan stop hostednetwork

If one wants to alter the password, for instance, you forget the original one, utilizing the following command:
netsh wlan set hostednetwork key="Your-New-Password"
netsh wlan refresh hostednetwork key

The SSID can be changed as well:
netsh wlan set hostednetwork ssid="Your-New-SSID"

Happy networking, happy surfing!

Monday, September 19, 2016

How to use TensorFlow on Windows via Docker

The support of Docker on Windows comes in two flavors. For native support, some version of Windows is required and it's still maturing, I would confess Hyper-V is a nice thing from the native Windows perspective, however it's too greedy on Memory. So for me just focus on Docker toolbox for Windows.

Before we begin, let's have a look at the architecture of Docker working principle on Windows, referring to the following figure[1]:

So general speaking, the host contained in a image called boot2docker which is running in a virtual machine (here it's called "default" running in VirtualBox). Docker daemon and any other containers run within the host.
So when manipulate with the command docker-machine, generally the parameter is always default, for example:
docker-machine start default
docker-machine stop default

After click the "Docker Quickstart Terminal", after a while for preparation (create the default virtual machine running in VirtualBox for the first time; or bring it to life if it's already exist. and start the deamon and communicate with it):

The host can be accessed by the pre-configured ssh setting:

Just typing ssh docker@127.0.0.1 -p 1939. The password is tcuser.

Here we can inspect some interesting settings.
1. The docker toolbox will create a host-only network interface for the "default" running instance:

It will be functioning at least to layer 3, so it gets an IP address, usually it's 192.168.99.1 from outside.
2. When you inspect inside from docker, it's quite interesting:

I know eth1 is corresponded to the newly create host-only network, to my understanding, the host-only network interface can be shared by many instances, so each should has their own specific IP address, so here it's 192.168.99.100

Notice the docker_gwbridge, as it name reflects, it's a bridge built on top of eth1. I guess it's got a IP address range 172.18.0.0/16 just for the flexibility. There's potentiality for existence of more such bridges.

Another interface docker0 is actually connected to this bridge. If you run more containers, one can guess that the virtual interface is also connected to docker_gwbridge, with IP addresses from 172.17.0.0/16.

Once such networking topology is made clear, it can ease some confusing problems.

For example, it's quite common for one runs container first then realize that export some port for external access is unavoidable. For example, if we just run the following command:
docker run --name tensorflow -it gcr.io/tensorflow/tensorflow:latest-devel
we will find later we cannot access TensorBoard, which is listening on the following address: 0.0.0.0:6006.

So one way is to remove the existing tensorflow container, and explicitly publish the port-mapping on creating:
docker rm tensorflow
docker run -p 6006:6006 --name tensorflow -it gcr.io/tensorflow/tensorflow:latest-devel

Another way is enter docker host, and manipulate the iptables (I guess it works, but I haven't try):

The command working with common container is simple, for example, start and stop a container:

docker stop tensorflow

docker start -ai tensorflow

In the above command, it's necessary to temporarily redirect stdin, stdout and stderr to the current terminal for convenience.

Once start a tensorflow docker instance, the following is just happy hacking!

Thursday, June 30, 2016

Haar wavelet with Matlab implementation

Haar wavelet is among the most simplest wavelets, and it has some application in various field such as signal processing and image processing.

The following is just a implementation of Haar wavelet in Matlab script. The key is not the implementation, but some insight into wavelet:
(1) Taking image processing for example, the most exact image is the original image, so for processing, like compression, the key question is to which extent one can tolerate the entropy to increase, that means suppose the image I belongs to space V(J), you can only play with the image scattering entropy among V(j) with j < J.
(2) When comparing data in Matlab, always pay attention to the scale, I didn't pay much attention to the notation like "1.0e-15 *", so originally I thought there's wrong implementation in my code, later I realize all is correct.

The script is as follows:

function wavelet_2d_forward_and_inverse(img_path)

img = imread(img_path, 'jpg');
img = im2double(rgb2gray(img));

figure;
imshow(img);
title('Original image');

[m, n] = size(img);

if m ~= n
disp('only squared image supported');
return;
end

coeff = 1 / 2 ^ 0.5;

% generate row approximation and detail matrix
HR = zeros(n / 2, n);
GR = zeros(n / 2, n);

for i = 1 : n / 2;
HR(i, 2 * i - 1) = coeff;
GR(i, 2 * i - 1) = coeff;
HR(i, 2 * i) = coeff;
GR(i, 2 * i) = -1 * coeff;
end

% generate column approximation and detail matrix
HC = zeros(m / 2, m);
GC = zeros(m / 2, m);

for i = 1 : m / 2;
HC(i, 2 * i - 1) = coeff;
GC(i, 2 * i - 1) = coeff;
HC(i, 2 * i) = coeff;
GC(i, 2 * i) = -1 * coeff;
end

% calculate the approximation and detail parts respectively
img_c = HC * img * HR';
img_d1 = GC * img * HR';
img_d2 = HC * img * GR';
img_d3 = GC * img * GR';

img_W = [img_c img_d1; img_d2 img_d3];

img2 = mat2gray(img_W);
figure;
imshow(img2);
title('Forward Haar transform');

% inverse transformation
HR_adj = HR';
GR_adj = GR';
HC_adj = HC';
GC_adj = GC';

img_orig = HC_adj * img_c * HR_adj' + ...
GC_adj * img_d1 * HR_adj' + ...
HC_adj * img_d2 * GR_adj' + ...
GC_adj * img_d3 * GR_adj';

img3 = mat2gray(img_orig);
figure;
imshow(img3);
title('Inverse Haar transform');

The images are as follows: