Saturday, May 27, 2017

How to write a word recite program via TensorFlow and Sonnet

It's obviously if you give a network a word like "congratulations" and it will learn the correlation and will generate "congratulations". When considering RNN, there's should be a one or several step lag(s). For the situation in one step lag, that means if you input "congratulations[ ]", it will generate "[ ]congratulations". here "[ ]" means a space character. I have written a Matlab script for playing so, however when playing with Sonnet, to my surprise the speed is astonishing. Thank Google for always shipping great tools.

Following is the snippet, feel free to tweak with it and good luck:

import tensorflow as tf
import sonnet as snt

label_size = 27
hidden_size = 128
batch_size = 1

class MyOneHotData(snt.AbstractModule):
    def __init__(self, depth = label_size, on_value = 1.0, off_value = 0.0, name = 'my_one_hot_data'):
        super(MyOneHotData, self).__init__(name = name)
        self._on_value = on_value
        self._off_value = off_value
        self._depth = label_size

    def _build(self, inputs, axis = -1, append_head = None, append_tail = None):
        indices = [(ord(c) - 96) for c in inputs]
        if append_head:
            indices = [0] * append_head + indices
        if append_tail:
            indices = indices + [0] * append_tail

        return tf.one_hot(indices, self._depth, self._on_value, self._off_value, axis, tf.float32)

class MySoftmax(snt.AbstractModule):
    def __init__(self, hidden_size = hidden_size, label_size = label_size, name = "my_softmax"):
        super(MySoftmax, self).__init__(name = name)
        self._hidden_size = hidden_size
        self._label_size = label_size

    @snt.experimental.reuse_vars
    def _trans(self, inputs):
        w = tf.get_variable("w", shape = [self._hidden_size, self._label_size])
        b = tf.get_variable("b", shape = [self._label_size])
        return tf.matmul(inputs, w) + b
        
    def _build(self, inputs):
        unstack_along_time_series_inputs = tf.unstack(inputs)
        return tf.stack([self._trans(c) for c in unstack_along_time_series_inputs])
        

class MyRNN(snt.AbstractModule):
    def __init__(self, batch_size = batch_size, hidden_size = hidden_size, name = "my_rnn"):
        super(MyRNN, self).__init__(name = name)
        self._batch_size = batch_size
        self._hidden_size = hidden_size

    def _build(self, inputs):
        lstm = snt.LSTM(self._hidden_size)
        init_state = lstm.initial_state(self._batch_size)
        output_sequence, final_state = tf.nn.dynamic_rnn(lstm, inputs, initial_state = init_state, time_major = True)
        return output_sequence

class MyWord(snt.AbstractModule):
    def __init__(self, label_size = label_size, name = "my_word"):
        super(MyWord, self).__init__(name = name)
        self._label_size = label_size

    def _build(self, inputs):
        indices = tf.argmax(inputs, 1)
        chars = [tf.cond(tf.equal(indices[i], 0), lambda: tf.constant(32, tf.int64), lambda: indices[i] + 96) \
            for i in range(indices.get_shape().as_list()[0])]
        return chars

with tf.Session() as sess:
    my_one_hot_data = MyOneHotData()

    encoded_input = my_one_hot_data("congradulations", append_tail = 1)
    input_with_batch_dim = tf.expand_dims(encoded_input, axis = 1)

    my_rnn = MyRNN()
    outputs = my_rnn(input_with_batch_dim)


    my_softmax = MySoftmax()   
    label_pred_with_batch = my_softmax(outputs)
    
    label_pred = tf.squeeze(label_pred_with_batch, axis = 1)

    encoded_label = my_one_hot_data("congradulations", append_head = 1)

    loss = tf.nn.softmax_cross_entropy_with_logits(labels = encoded_label, logits = label_pred)

    graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    total_regularization_loss = tf.reduce_sum(graph_regularizers)

    total_loss = tf.reduce_mean(loss) + total_regularization_loss

    train_op = tf.train.GradientDescentOptimizer(0.05).minimize(total_loss)    

    my_word = MyWord()
    chars = my_word(label_pred)

    tf.summary.scalar("model-loss", total_loss)
    summ_op = tf.summary.merge_all()


    sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

    writer = tf.summary.FileWriter("char_pred_train", sess.graph)

    
    for i in range(1000):
        _, summaries = sess.run([train_op, summ_op])
        #writer.add_summary(summaries, global_step = i)
        sole_chars = sess.run(chars)
        
        print(''.join([chr(c) for c in sole_chars]))

    writer.close()

Saturday, May 20, 2017

How to prepare tfrecords utilizing TensorFlow for training models (II)

The following code snippet is the corresponding code for retrieving tfrecords from the prepared tfrecords files just in the previous post. Hope it's useful for relieving some difficulties for beginners.

'''
@author: Yurui Ming (yrming@gmail.com)
'''
import tensorflow as tf
import os
import skimage.io as io

class TFRecordPumper(object):
    '''
    classdocs
    '''

    def __init__(self, graph = None, sess = None):
        '''
        Constructor
        '''
        if graph == None:
            self._graph = tf.Graph()
        else:
            self._graph = graph
        
        if sess == None:
            self._sess = tf.Session(graph = self._graph)
            self._self_sess = True
        else:
            self._sess = sess
            self._self_sess = False
    
    def __exit__(self):
        if self._coord:
            self._coord.request_stop()
            self._coord.join(self._threads)
        
        if self._self_sess == True:
            self._sess.close()
        
    
    def Pump(self, tfr_dir, tfr_basename, batch_size = 2, features = None, img_shape = None,
             capacity = 10, num_threads = 1, min_after_dequeue = 5):
        '''
        Pump
        pumping out tfrecords
        Args:
            tfr_dir: directory contains tfrecords file
            tfr_basename: basename pattern for collecting tfrecords files
            batch_size: batch number of tfrecords to pump each time
            features: features describing tfrecords
        '''
        
        # assume the most general feature if nono provided
        if features == None:
            features = {'image': tf.FixedLenFeature([], tf.string),
                        'label': tf.FixedLenFeature([1], tf.int64)
                        }
        
        with self._graph.as_default():
            ptn = os.path.join(tfr_dir, tfr_basename + "*.tfrecords")
        
            filenames = tf.train.match_filenames_once(ptn)
            
            tf_record_filename_queue = tf.train.string_input_producer(filenames)
            
            # Notice the different record reader, this one is designed to work with TFRecord files which may
            # have more than one example in them.
            
            tf_record_reader = tf.TFRecordReader()
            _, tf_record_serialized = tf_record_reader.read(tf_record_filename_queue)
            
            # The label and image are stored as bytes but could be stored as int64 or float64 values in 
            # serialized tf.Example protobuf.
            if 'train' in tfr_basename:
                label_key = 'train/label'
                image_key = 'train/image'
            elif 'xval' in tfr_basename:
                label_key = 'xval/label'
                image_key = 'xval/image'
            elif 'test' in tfr_basename:
                label_key = 'test/label'
                image_key = 'test/image'
            else:
                label_key = 'label'
                image_key = 'image'
                
            tf_record_features = tf.parse_single_example(tf_record_serialized,
                                                         features = {
                                                             label_key: tf.FixedLenFeature([], tf.int64),
                                                             image_key: tf.FixedLenFeature([], tf.string),
                                                             })
            
            # Using tf.uint8 because all of the channel information is between 0-255
            tf_record_image = tf.reshape(tf_record_features[image_key], [])
            
            tf_record_image = tf.decode_raw(tf_record_image, tf.uint8)
            
            # Reshape the image to look like the image saved, not required
            if img_shape:
                tf_record_image = tf.reshape(tf_record_image, img_shape)
            
            # Use real values for the height, width and channels of the image because it's required
            # to reshape the input.
            
            tf_record_label = tf_record_features[label_key];
            
            
            images, labels = tf.train.shuffle_batch([tf_record_image, tf_record_label],
                                                    batch_size = batch_size,
                                                    capacity = capacity,
                                                    min_after_dequeue = min_after_dequeue,
                                                    num_threads = num_threads)
            
            init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

            with self._sess.as_default():
                self._sess.run(init_op)
                
                self._coord = tf.train.Coordinator()
                self._threads = tf.train.start_queue_runners(coord = self._coord)

                #images, labels = self._sess.run([tf_record_image, tf_record_label])
                
                yield self._sess.run([images, labels])
            
                
if __name__ == '__main__':
    tf_pumper = TFRecordPumper()
    #images, labels = tf_pumper.Pump('', 'train', img_shape = [64, 64, 3])

    images, labels = next(tf_pumper.Pump('', 'xval', img_shape = [64, 64, 3]))
    
    for i in range(images.shape[0]):
        io.imshow(images[i, ...])
        
    io.show() 

How to prepare tfrecords utilizing TensorFlow for training models

The merit of utilizing tfrecords is manifest, since high throughput of feeding can obviously keep the training iteration from starving. A precondition is one should have tfrecords prepared before launching the whole process. The general guidelines could be easily understood however since example codes are scattered here and there, so it's not easy for assembling the snippets to form something actually workable. The following code has such an aim and intention in mind, so hope it's useful for everybody's work concerning deep learning. BTW no hesitate for providing any feedback concerning improvement of the code quality.

'''
@author: Yurui Ming (yrming@gmail.com)
'''
import numpy as np
import tensorflow as tf
import os

class TFRecordGenerator(object):
    '''
    classdocs
    '''
    def __init__(self, params = None):
        '''
        Constructor
        '''
        self._graph = tf.Graph()
    def _int64_feature(self, value):
        return tf.train.Feature(int64_list = tf.train.Int64List(value = [value]))

    def _bytes_feature(self, value):
        return tf.train.Feature(bytes_list = tf.train.BytesList(value = [value]))
    
    def Generate(self, img_dir, img_fmt = None, img_shape = [64, 64], partition = [0.8, 0.1, 0.1], 
                  train_tfrecord_base_name = 'train{}.tfrecords',
                  xval_tfrecord_base_name = 'xval{}.tfrecords',
                  test_tfrecord_base_name = 'test{}.tfrecords', 
                  split_unit = 500):
        '''
        Generate
        Generate TFRecord files
        Three categories of TFRecord files will be generated, namely, training category, cross-validating category and testing category
        Args:
            img_dir: directory containing the images. The label should be decided from the training name
            img_fmt: image encoding standard, e.g., jpeg or png
            partition: portions of percentage of each category, namely, training, cross-validating and testing
            train_tfrecord_base_name: base training tfrecord file name paradigm for generating training tfrecord file name
            xval_tfrecord_base_name: base cross-validating tfrecord file name paradigm for generating cross-validating tfrecord file name
            test_tfrecord_base_name: base testing tfrecord file name paradigm for generating testing tfrecord file name
            split_unit: number of accumulated tfrecords in each tfrecord file 
        '''
        if not img_fmt:
            raise ValueError('Unspecified image format')
                    
        with self._graph.as_default():
            ptn = None
            if 'jpg' in img_fmt:
                ptn = os.path.join(img_dir, '*.jpg')
            if 'png' in img_fmt:
                ptn = os.path.join(img_dir, '*.png')
            if not ptn:
                raise ValueError('Unsupported image format')

            filenames = tf.train.match_filenames_once(ptn)
            filename_queue = tf.train.string_input_producer(filenames)
            image_reader = tf.WholeFileReader()
            image_key, image_file = image_reader.read(filename_queue)

            if 'jpg' in img_fmt:
                image_data = tf.image.decode_jpeg(image_file)
            if 'png' in img_fmt:
                image_data = tf.image.decode_png(image_file)
         
            image_data_shape = tf.shape(image_data)
            
            if img_shape:
                image_data = tf.cond(image_data_shape[0] > image_data_shape[1], \
                                     lambda: tf.image.resize_image_with_crop_or_pad(image_data, image_data_shape[1], image_data_shape[1]),
                                     lambda: tf.image.resize_image_with_crop_or_pad(image_data, image_data_shape[0], image_data_shape[0]))
                
                image_data = tf.image.resize_images(image_data, img_shape)        
            
            image_data = tf.cast(image_data, tf.uint8)
            
            #image_data = tf.image.encode_jpeg(image_data);
                
            init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
            
            with tf.Session() as sess:
                sess.run(init)
                
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess = sess, coord = coord)
                
                num_files = len(sess.run(filenames))
                
                if np.sum(partition) > 1:
                    raise ValueError('Invalid partition')
                
                partition = [v * num_files for v in partition]
                
                # training tfrecord category
                writer = None
                for i in range(int(partition[0])):
                    if not i % split_unit:
                        if writer:
                            writer.close()
                        train_filename = train_tfrecord_base_name.format(i)
                        writer = tf.python_io.TFRecordWriter(train_filename)
                        
                    image_label, image_cont = sess.run([image_key, image_data])
                    
                    if b'cat' in image_label:
                        label = 0
                    elif b'dog' in image_label:
                        label = 1
                    else:
                        raise ValueError('Invalid file name: {}'.format(image_label))
                    
                    feature = {
                        'train/label': self._int64_feature(label),
                        'train/image': self._bytes_feature(image_cont.tobytes())
                        }

                    
                    example = tf.train.Example(features = tf.train.Features(feature = feature))
                    writer.write(example.SerializeToString())
                writer.close()
                
                writer = None
                for i in range(int(partition[1])):
                    if not i % split_unit:
                        if writer:
                            writer.close()
                        xval_filename = xval_tfrecord_base_name.format(i)
                        writer = tf.python_io.TFRecordWriter(xval_filename)

                    image_label, image_cont = sess.run([image_key, image_data])

                    if b'cat' in image_label:
                        label = 0
                    elif b'dog' in image_label:
                        label = 1
                    else:
                        raise ValueError('Invalid file name: {}'.format(image_label))
                    
                    feature = {
                        'xval/label': self._int64_feature(label),
                        'xval/image': self._bytes_feature(image_cont.tobytes())
                        }

                    example = tf.train.Example(features = tf.train.Features(feature = feature))
                    writer.write(example.SerializeToString())
                writer.close()
                
                writer = None
                for i in range(int(partition[2])):
                    if not i % split_unit:
                        if writer:
                            writer.close()
                        test_filename = test_tfrecord_base_name.format(i)
                        writer = tf.python_io.TFRecordWriter(test_filename)

                    image_label, image_cont = sess.run([image_key, image_data])

                    if b'cat' in image_label:
                        label = 0
                    elif b'dog' in image_label:
                        label = 1
                    else:
                        raise ValueError('Invalid file name: {}'.format(image_label))
                    
                    feature = {
                        'test/label': self._int64_feature(label),
                        'test/image': self._bytes_feature(image_cont.tobytes())
                        }

                    example = tf.train.Example(features = tf.train.Features(feature = feature))
                    writer.write(example.SerializeToString())

                
                writer.close()
                writer = None
            
                coord.request_stop()
                coord.join(threads)
            
if __name__ == '__main__':
    tf_generator = TFRecordGenerator()
    tf_generator.Generate('C:\\Users\\MSUser\\Downloads\\mytest', 'jpg')