I am recently learning Deep Learning from Udacity.
For 1_notmnist.ipynb, Problem 1: Let's take a peek at some of the data to make sure it looks sensible. Each exemplar should be an image of a character A through J rendered in a different font. Display a sample of the images that we just downloaded. Hint: you can use the package IPython.display.
For 1_notmnist.ipynb, Problem 1: Let's take a peek at some of the data to make sure it looks sensible. Each exemplar should be an image of a character A through J rendered in a different font. Display a sample of the images that we just downloaded. Hint: you can use the package IPython.display.
One alternative way might be the following code:
import os, fnmatch
img_files = []
def all_img_files(img_files, search_path, pattern = '*.png'):
for path, subdirs, files in os.walk(search_path):
if files and fnmatch.fnmatch(files[0], pattern):
img_files.append(os.path.join(path, files[0]))
break;
for folder in train_folders:
all_img_files(img_files, folder)
for folder in test_folders:
all_img_files(img_files, folder)
for img in img_files:
Image(filename = img)
However I found it's extremely slow, probably due to every sub-directories and files will be gathered on os.walk returning. the break statement has only a little affect on the whole processing time.
So I decide to write some code which genuinely fetches the first png file in each A-J directories respectively. Readers can follow the below linkage for reference on how the work can be down via VC++:
There's plenty of material on how to do that, namely write extension for Python, so the following is just source code without much explanation. I did it with Anaconda python 3.5 with Visual C++ 2015. Other platforms probably need some adjustment:
#include <Python.h>
#include <tchar.h>
#include <stdio.h>
#include <strsafe.h>
#include <Windows.h>
#include <Shlwapi.h>
#include "deelx.h"
#pragma comment(lib, "python35.lib")
#pragma comment(lib, "User32.lib")
#pragma comment(lib, "Shlwapi.lib")
static PyObject *get_first_matched_file_error;
static PyObject* get_first_matched_file(PyObject* self, PyObject* args)
{
WIN32_FIND_DATA ffd;
TCHAR szDir[MAX_PATH];
HANDLE hFind = INVALID_HANDLE_VALUE;
DWORD dwError = 0;
int wchars_num;
char* directoryA;
wchar_t* directoryW;
char* patternA;
wchar_t* patternW;
if (!PyArg_ParseTuple(args, "sz", &directoryA, &patternA))
return NULL;
wchars_num = MultiByteToWideChar(CP_UTF8, 0, directoryA, -1, NULL, 0);
directoryW = new wchar_t[wchars_num];
MultiByteToWideChar(CP_UTF8, 0, directoryA, -1, directoryW, wchars_num);
if (!PathFileExists(directoryW))
{
PyErr_SetString(get_first_matched_file_error, "Non-existing directory");
delete[] directoryW;
return NULL;
}
// Prepare string for use with FindFile functions. First, copy the
// string to a buffer, then append '\*' to the directory name.
StringCchCopy(szDir, MAX_PATH, directoryW);
delete[] directoryW;
StringCchCat(szDir, MAX_PATH, TEXT("\\*"));
wchars_num = MultiByteToWideChar(CP_UTF8, 0, patternA, -1, NULL, 0);
patternW = new wchar_t[wchars_num];
MultiByteToWideChar(CP_UTF8, 0, patternA, -1, patternW, wchars_num);
CRegexpT <wchar_t> regexp(patternW);
// Find the first file in the directory.
hFind = FindFirstFile(szDir, &ffd);
if (INVALID_HANDLE_VALUE == hFind)
{
delete[] patternW;
PyErr_SetString(get_first_matched_file_error, "Cannot open directory");
return NULL;
}
PyObject * pyFileName = NULL;
// List all the files in the directory with some info about them.
do
{
if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
continue;
}
else
{
MatchResult result = regexp.Match(ffd.cFileName);
if (result.IsMatched())
{
char* cFileName;
int chars_num;
chars_num = WideCharToMultiByte(CP_UTF8, 0, ffd.cFileName, -1, NULL, 0, NULL, NULL);
cFileName = new char[chars_num];
WideCharToMultiByte(CP_UTF8, 0, ffd.cFileName, -1, cFileName, chars_num, NULL, NULL);
pyFileName = Py_BuildValue("s", cFileName);
delete[] cFileName;
break;
}
}
} while (FindNextFile(hFind, &ffd) != 0);
if (GetLastError() == ERROR_NO_MORE_FILES)
pyFileName = Py_BuildValue("s", "");
FindClose(hFind);
delete[] patternW;
return pyFileName;
}
static PyMethodDef get_first_matched_file_method[] = {
{
"get_first_matched_file", get_first_matched_file,
METH_VARARGS, "Get the first file given directory and pattern"
},
{NULL, NULL, 0, NULL} /* Sentinel */
};
static struct PyModuleDef get_first_matched_file_module =
{
PyModuleDef_HEAD_INIT,
"get_first_matched_file", /* name of module */
"Get the first file given directory and pattern", /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module, or -1 if the module keeps state in global variables. */
get_first_matched_file_method
};
PyMODINIT_FUNC PyInit_get_first_matched_file(void)
{
PyObject *m = PyModule_Create(&get_first_matched_file_module);
if (m == NULL)
return NULL;
get_first_matched_file_error = PyErr_NewException("get_first_matched_file.error", NULL, NULL);
Py_INCREF(get_first_matched_file_error);
PyModule_AddObject(m, "error", get_first_matched_file_error);
return m;
}
There's only one dependency, namely deelx.h, referring to the websites below:
A testing script is as follows:
import sys
sys.path.append("C:\\Users\\MS User\\Documents\\Visual Studio 2015\\Projects\\PythonExtensions\\x64\\Release")
import get_first_matched_file
directory = "C:\\tensorflow\\tensorflow\\examples\\udacity\\notMNIST_large\\B"
pattern = "\\.png$"
file = get_first_matched_file.get_first_matched_file(directory, pattern)
print(file)
Enjoy python, enjoy learning from Udacity.
Project setting: