Deep Learning Pearls: May 2017

Saturday, May 27, 2017

How to write a word recite program via TensorFlow and Sonnet

It's obviously if you give a network a word like "congratulations" and it will learn the correlation and will generate "congratulations". When considering RNN, there's should be a one or several step lag(s). For the situation in one step lag, that means if you input "congratulations[ ]", it will generate "[ ]congratulations". here "[ ]" means a space character. I have written a Matlab script for playing so, however when playing with Sonnet, to my surprise the speed is astonishing. Thank Google for always shipping great tools.

Following is the snippet, feel free to tweak with it and good luck:

import tensorflow as tf
import sonnet as snt

label_size = 27
hidden_size = 128
batch_size = 1

class MyOneHotData(snt.AbstractModule):
def __init__(self, depth = label_size, on_value = 1.0, off_value = 0.0, name = 'my_one_hot_data'):
super(MyOneHotData, self).__init__(name = name)
self._on_value = on_value
self._off_value = off_value
self._depth = label_size

def _build(self, inputs, axis = -1, append_head = None, append_tail = None):
indices = [(ord(c) - 96) for c in inputs]
if append_head:
indices = [0] * append_head + indices
if append_tail:
indices = indices + [0] * append_tail

return tf.one_hot(indices, self._depth, self._on_value, self._off_value, axis, tf.float32)

class MySoftmax(snt.AbstractModule):
def __init__(self, hidden_size = hidden_size, label_size = label_size, name = "my_softmax"):
super(MySoftmax, self).__init__(name = name)
self._hidden_size = hidden_size
self._label_size = label_size

@snt.experimental.reuse_vars
def _trans(self, inputs):
w = tf.get_variable("w", shape = [self._hidden_size, self._label_size])
b = tf.get_variable("b", shape = [self._label_size])
return tf.matmul(inputs, w) + b

def _build(self, inputs):
unstack_along_time_series_inputs = tf.unstack(inputs)
return tf.stack([self._trans(c) for c in unstack_along_time_series_inputs])


class MyRNN(snt.AbstractModule):
def __init__(self, batch_size = batch_size, hidden_size = hidden_size, name = "my_rnn"):
super(MyRNN, self).__init__(name = name)
self._batch_size = batch_size
self._hidden_size = hidden_size

def _build(self, inputs):
lstm = snt.LSTM(self._hidden_size)
init_state = lstm.initial_state(self._batch_size)
output_sequence, final_state = tf.nn.dynamic_rnn(lstm, inputs, initial_state = init_state, time_major = True)
return output_sequence

class MyWord(snt.AbstractModule):
def __init__(self, label_size = label_size, name = "my_word"):
super(MyWord, self).__init__(name = name)
self._label_size = label_size

def _build(self, inputs):
indices = tf.argmax(inputs, 1)
chars = [tf.cond(tf.equal(indices[i], 0), lambda: tf.constant(32, tf.int64), lambda: indices[i] + 96) \
for i in range(indices.get_shape().as_list()[0])]
return chars

with tf.Session() as sess:
my_one_hot_data = MyOneHotData()

encoded_input = my_one_hot_data("congradulations", append_tail = 1)
input_with_batch_dim = tf.expand_dims(encoded_input, axis = 1)

my_rnn = MyRNN()
outputs = my_rnn(input_with_batch_dim)

my_softmax = MySoftmax()
label_pred_with_batch = my_softmax(outputs)

label_pred = tf.squeeze(label_pred_with_batch, axis = 1)

encoded_label = my_one_hot_data("congradulations", append_head = 1)

loss = tf.nn.softmax_cross_entropy_with_logits(labels = encoded_label, logits = label_pred)

graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
total_regularization_loss = tf.reduce_sum(graph_regularizers)

total_loss = tf.reduce_mean(loss) + total_regularization_loss

train_op = tf.train.GradientDescentOptimizer(0.05).minimize(total_loss)

my_word = MyWord()
chars = my_word(label_pred)

tf.summary.scalar("model-loss", total_loss)
summ_op = tf.summary.merge_all()

sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

writer = tf.summary.FileWriter("char_pred_train", sess.graph)


for i in range(1000):
_, summaries = sess.run([train_op, summ_op])
#writer.add_summary(summaries, global_step = i)
sole_chars = sess.run(chars)

print(''.join([chr(c) for c in sole_chars]))

writer.close()

Saturday, May 20, 2017

How to prepare tfrecords utilizing TensorFlow for training models (II)

The following code snippet is the corresponding code for retrieving tfrecords from the prepared tfrecords files just in the previous post. Hope it's useful for relieving some difficulties for beginners.

'''
@author: Yurui Ming (yrming@gmail.com)
'''
import tensorflow as tf
import os
import skimage.io as io

class TFRecordPumper(object):
'''
classdocs
'''

def __init__(self, graph = None, sess = None):
'''
Constructor
'''
if graph == None:
self._graph = tf.Graph()
else:
self._graph = graph

if sess == None:
self._sess = tf.Session(graph = self._graph)
self._self_sess = True
else:
self._sess = sess
self._self_sess = False

def __exit__(self):
if self._coord:
self._coord.request_stop()
self._coord.join(self._threads)

if self._self_sess == True:
self._sess.close()


def Pump(self, tfr_dir, tfr_basename, batch_size = 2, features = None, img_shape = None,
capacity = 10, num_threads = 1, min_after_dequeue = 5):
'''
Pump
pumping out tfrecords
Args:
tfr_dir: directory contains tfrecords file
tfr_basename: basename pattern for collecting tfrecords files
batch_size: batch number of tfrecords to pump each time
features: features describing tfrecords
'''

# assume the most general feature if nono provided
if features == None:
features = {'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([1], tf.int64)
}

with self._graph.as_default():
ptn = os.path.join(tfr_dir, tfr_basename + "*.tfrecords")

filenames = tf.train.match_filenames_once(ptn)

tf_record_filename_queue = tf.train.string_input_producer(filenames)

# Notice the different record reader, this one is designed to work with TFRecord files which may
# have more than one example in them.

tf_record_reader = tf.TFRecordReader()
_, tf_record_serialized = tf_record_reader.read(tf_record_filename_queue)

# The label and image are stored as bytes but could be stored as int64 or float64 values in
# serialized tf.Example protobuf.
if 'train' in tfr_basename:
label_key = 'train/label'
image_key = 'train/image'
elif 'xval' in tfr_basename:
label_key = 'xval/label'
image_key = 'xval/image'
elif 'test' in tfr_basename:
label_key = 'test/label'
image_key = 'test/image'
else:
label_key = 'label'
image_key = 'image'

tf_record_features = tf.parse_single_example(tf_record_serialized,
features = {
label_key: tf.FixedLenFeature([], tf.int64),
image_key: tf.FixedLenFeature([], tf.string),
})

# Using tf.uint8 because all of the channel information is between 0-255
tf_record_image = tf.reshape(tf_record_features[image_key], [])

tf_record_image = tf.decode_raw(tf_record_image, tf.uint8)

# Reshape the image to look like the image saved, not required
if img_shape:
tf_record_image = tf.reshape(tf_record_image, img_shape)

# Use real values for the height, width and channels of the image because it's required
# to reshape the input.

tf_record_label = tf_record_features[label_key];


images, labels = tf.train.shuffle_batch([tf_record_image, tf_record_label],
batch_size = batch_size,
capacity = capacity,
min_after_dequeue = min_after_dequeue,
num_threads = num_threads)

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

with self._sess.as_default():
self._sess.run(init_op)

self._coord = tf.train.Coordinator()
self._threads = tf.train.start_queue_runners(coord = self._coord)

#images, labels = self._sess.run([tf_record_image, tf_record_label])

yield self._sess.run([images, labels])


if __name__ == '__main__':
tf_pumper = TFRecordPumper()
#images, labels = tf_pumper.Pump('', 'train', img_shape = [64, 64, 3])

images, labels = next(tf_pumper.Pump('', 'xval', img_shape = [64, 64, 3]))

for i in range(images.shape[0]):
io.imshow(images[i, ...])

io.show()

How to prepare tfrecords utilizing TensorFlow for training models

The merit of utilizing tfrecords is manifest, since high throughput of feeding can obviously keep the training iteration from starving. A precondition is one should have tfrecords prepared before launching the whole process. The general guidelines could be easily understood however since example codes are scattered here and there, so it's not easy for assembling the snippets to form something actually workable. The following code has such an aim and intention in mind, so hope it's useful for everybody's work concerning deep learning. BTW no hesitate for providing any feedback concerning improvement of the code quality.

'''
@author: Yurui Ming (yrming@gmail.com)
'''
import numpy as np
import tensorflow as tf
import os

class TFRecordGenerator(object):
'''
classdocs
'''
def __init__(self, params = None):
'''
Constructor
'''
self._graph = tf.Graph()
def _int64_feature(self, value):
return tf.train.Feature(int64_list = tf.train.Int64List(value = [value]))

def _bytes_feature(self, value):
return tf.train.Feature(bytes_list = tf.train.BytesList(value = [value]))

def Generate(self, img_dir, img_fmt = None, img_shape = [64, 64], partition = [0.8, 0.1, 0.1],
train_tfrecord_base_name = 'train{}.tfrecords',
xval_tfrecord_base_name = 'xval{}.tfrecords',
test_tfrecord_base_name = 'test{}.tfrecords',
split_unit = 500):
'''
Generate
Generate TFRecord files
Three categories of TFRecord files will be generated, namely, training category, cross-validating category and testing category
Args:
img_dir: directory containing the images. The label should be decided from the training name
img_fmt: image encoding standard, e.g., jpeg or png
partition: portions of percentage of each category, namely, training, cross-validating and testing
train_tfrecord_base_name: base training tfrecord file name paradigm for generating training tfrecord file name
xval_tfrecord_base_name: base cross-validating tfrecord file name paradigm for generating cross-validating tfrecord file name
test_tfrecord_base_name: base testing tfrecord file name paradigm for generating testing tfrecord file name
split_unit: number of accumulated tfrecords in each tfrecord file
'''
if not img_fmt:
raise ValueError('Unspecified image format')

with self._graph.as_default():
ptn = None
if 'jpg' in img_fmt:
ptn = os.path.join(img_dir, '*.jpg')
if 'png' in img_fmt:
ptn = os.path.join(img_dir, '*.png')
if not ptn:
raise ValueError('Unsupported image format')

filenames = tf.train.match_filenames_once(ptn)
filename_queue = tf.train.string_input_producer(filenames)
image_reader = tf.WholeFileReader()
image_key, image_file = image_reader.read(filename_queue)

if 'jpg' in img_fmt:
image_data = tf.image.decode_jpeg(image_file)
if 'png' in img_fmt:
image_data = tf.image.decode_png(image_file)

image_data_shape = tf.shape(image_data)

if img_shape:
image_data = tf.cond(image_data_shape[0] > image_data_shape[1], \
lambda: tf.image.resize_image_with_crop_or_pad(image_data, image_data_shape[1], image_data_shape[1]),
lambda: tf.image.resize_image_with_crop_or_pad(image_data, image_data_shape[0], image_data_shape[0]))

image_data = tf.image.resize_images(image_data, img_shape)

image_data = tf.cast(image_data, tf.uint8)

#image_data = tf.image.encode_jpeg(image_data);

init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

with tf.Session() as sess:
sess.run(init)

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess = sess, coord = coord)

num_files = len(sess.run(filenames))

if np.sum(partition) > 1:
raise ValueError('Invalid partition')

partition = [v * num_files for v in partition]

# training tfrecord category
writer = None
for i in range(int(partition[0])):
if not i % split_unit:
if writer:
writer.close()
train_filename = train_tfrecord_base_name.format(i)
writer = tf.python_io.TFRecordWriter(train_filename)

image_label, image_cont = sess.run([image_key, image_data])

if b'cat' in image_label:
label = 0
elif b'dog' in image_label:
label = 1
else:
raise ValueError('Invalid file name: {}'.format(image_label))

feature = {
'train/label': self._int64_feature(label),
'train/image': self._bytes_feature(image_cont.tobytes())
}


example = tf.train.Example(features = tf.train.Features(feature = feature))
writer.write(example.SerializeToString())
writer.close()

writer = None
for i in range(int(partition[1])):
if not i % split_unit:
if writer:
writer.close()
xval_filename = xval_tfrecord_base_name.format(i)
writer = tf.python_io.TFRecordWriter(xval_filename)

image_label, image_cont = sess.run([image_key, image_data])

if b'cat' in image_label:
label = 0
elif b'dog' in image_label:
label = 1
else:
raise ValueError('Invalid file name: {}'.format(image_label))

feature = {
'xval/label': self._int64_feature(label),
'xval/image': self._bytes_feature(image_cont.tobytes())
}

example = tf.train.Example(features = tf.train.Features(feature = feature))
writer.write(example.SerializeToString())
writer.close()

writer = None
for i in range(int(partition[2])):
if not i % split_unit:
if writer:
writer.close()
test_filename = test_tfrecord_base_name.format(i)
writer = tf.python_io.TFRecordWriter(test_filename)

image_label, image_cont = sess.run([image_key, image_data])

if b'cat' in image_label:
label = 0
elif b'dog' in image_label:
label = 1
else:
raise ValueError('Invalid file name: {}'.format(image_label))

feature = {
'test/label': self._int64_feature(label),
'test/image': self._bytes_feature(image_cont.tobytes())
}

example = tf.train.Example(features = tf.train.Features(feature = feature))
writer.write(example.SerializeToString())


writer.close()
writer = None

coord.request_stop()
coord.join(threads)

if __name__ == '__main__':
tf_generator = TFRecordGenerator()
tf_generator.Generate('C:\\Users\\MSUser\\Downloads\\mytest', 'jpg')