MobileNets are a family of neural networks architectures aimed at image classification. Their known to be small and fast yet still achieving a good classification accuracy with respect to more sophisticated architectures.
This document introduces MicroMobileNet, a Python package that gives you the possibility to train MobileNets in Python and then export them to plain C++, for use with Arduino, PlatformIO or ESP-IDF frameworks.
Install the dependencies
MicroMobileNet is based on Keras and Tensorflow.
pip install keras tensorflow Jinja2 sklearn Pillow cached_property
pip install micromobilenet
Prepare data
MicroMobileNet only works on grayscale, 96x96 images. If you have color images, you have to convert them to grayscale. If you have smaller/larger images, you have to resize/crop them.
The code sample below shows how to generate the training dataset assuming you have a folder structure that looks like the one depicted here.
DATA
|-- train
|---- apple
|------ ...list of images...
|---- orange
|------ ...list of images...
|---- banana
|------ ...list of images...
|-- validation
|---- apple
|------ ...list of images...
|---- orange
|------ ...list of images...
|---- banana
|------ ...list of images...
|-- test
|---- apple
|------ ...list of images...
|---- orange
|------ ...list of images...
|---- banana
|------ ...list of images...
"""
Load images from folders
"""
import os
import numpy as np
from os import listdir
from glob import glob
from PIL import Image
def load_folder(folder: str):
"""
Load images from folder as [0, 1] floats
:param folder:
:return:
"""
for filename in sorted(glob(f"{folder}/*.jpg") + glob(f"{folder}/*.jpeg")):
yield np.asarray(Image.open(filename).convert("L"), dtype=float) / 255.
def load_split(root: str, split_name: str):
"""
Load images from train/val/test folder
:param root:
:param split_name:
:return:
"""
X = []
Y = []
folders = listdir(f"{root}/{split_name}")
folders = [f"{root}/{split_name}/{f}" for f in folders if os.path.isdir(f"{root}/{split_name}/{f}")]
for k, folder in enumerate(sorted(folders)):
folder_x = list(load_folder(folder))
X += folder_x
Y += [k] * len(folder_x)
# shuffle inputs
shuffle_mask = np.random.permutation(len(X))
X = np.asarray(X)[shuffle_mask]
Y = np.asarray(Y)[shuffle_mask]
return X, Y
if __name__ == '__main__':
train_x, train_y = load_split("DATA", "train")
val_x, val_y = load_split("DATA", "val")
test_x, test_y = load_split("DATA", "test")
print(train_x.shape)
print(val_x.shape)
print(test_x.shape)
Instantiate and Train
First of all, you start by instantiating the MobileNet variation you prefer (refer to the section Architectures for the list of available configurations and their size).
from micromobilenet import PicoMobileNet
# replace num_classes with the actual number of classes
net = PicoMobileNet(num_classes=10)
Then, you configure all the parameters required for training.
net.config.learning_rate = 0.01
net.config.batch_size = 32
net.config.verbosity = 1
net.config.loss = "categorical_crossentropy"
net.config.metrics = ["categorical_accuracy"]
net.config.checkpoint_path = "./checkpoints/pico"
Finally, it is time to train the network on your data.
net.build()
net.compile()
net.fit(train_x, train_y, val_x, val_y, epochs=30)
To evaluate the accuracy on the test set:
from sklearn.metrics import classification_report
predictions = net.predict(test_x)
print(classification_report(test_y.argmax(axis=1), predictions.argmax(axis=1)))
Convert to C++
After you have your model trained and tested, it is time to export it to C++.
with open("MobileNet.h", "w") as file:
file.write(net.convert.to_cpp())
Generated code will look like the following.
/**
* "Compiled" implementation of modified MobileNet
*/
class PicoMobileNet {
public:
const uint16_t numInputs = 9216;
const uint16_t numOutputs = 4;
float outputs[4];
float arena[6936];
uint16_t output;
float proba;
/**
*
*/
MobileNet() : output(0), proba(0) {
for (uint16_t i = 0; i < numOutputs; i++)
outputs[i] = 0;
}
/**
*
* @param input
*/
uint16_t predict(float *input) {
float *ping = arena;
float *pong = arena + 3468;
// conv2d (0)
for (int16_t d = 0; d < 3; d++)
this->conv2d_3x3x1(input, ping + 32 * 32 * d, conv2d_0_weights[d], 96, 3);
// padding (1)
for (int16_t d = 0; d < 3; d++)
this->pad(ping + 32 * 32 * d, pong + 34 * 34 * d, 32);
memcpy(ping, pong, sizeof(float) * 34 * 34 * 3);
// depthwise (1)
for (int16_t d = 0; d < 3; d++)
this->depthwise_conv(ping + 34 * 34 * d, pong + 16 * 16 * d, depthwise_1_weights[d], 34, 2);
// pointwise (1)
for (int16_t d = 0; d < 6; d++)
this->pointwise_conv(pong, ping + 16 * 16 * d, pointwise_1_weights[d], 16, 3);
// padding (2)
for (int16_t d = 0; d < 6; d++)
this->pad(ping + 16 * 16 * d, pong + 18 * 18 * d, 16);
memcpy(ping, pong, sizeof(float) * 18 * 18 * 6);
// depthwise (2)
for (int16_t d = 0; d < 6; d++)
this->depthwise_conv(ping + 18 * 18 * d, pong + 8 * 8 * d, depthwise_2_weights[d], 18, 2);
// pointwise (2)
for (int16_t d = 0; d < 12; d++)
this->pointwise_conv(pong, ping + 8 * 8 * d, pointwise_2_weights[d], 8, 6);
// padding (3)
for (int16_t d = 0; d < 12; d++)
this->pad(ping + 8 * 8 * d, pong + 10 * 10 * d, 8);
memcpy(ping, pong, sizeof(float) * 10 * 10 * 12);
// depthwise (3)
for (int16_t d = 0; d < 12; d++)
this->depthwise_conv(ping + 10 * 10 * d, pong + 4 * 4 * d, depthwise_3_weights[d], 10, 2);
// pointwise (3)
for (int16_t d = 0; d < 24; d++)
this->pointwise_conv(pong, ping + 4 * 4 * d, pointwise_3_weights[d], 4, 12);
this->maxpool(ping, pong, 4, 24);
for (uint16_t d = 0; d < numOutputs; d++)
this->dot(pong, ping + d, conv2d_last_weights[d], conv2d_last_bias[d], 24);
this->softmax(ping, outputs, numOutputs);
return this->argmax();
}
/**
* Get index of max output
*/
uint16_t argmax() {
this->output = 0;
this->proba = outputs[0];
for (uint16_t i = 1; i < numOutputs; i++) {
if (outputs[i] > this->proba) {
this->proba = outputs[i];
this->output = i;
}
}
return this->output;
}
protected:
const float conv2d_0_weights[3][9] = {{-0.41666251421, -0.11826507002, -0.23049902916, 0.17382600904, -0.55161094666, 0.10363399237, -0.42671826482, -0.12638387084, -0.52779608965},
{0.31084173918, -0.04917881638, 0.40228804946, -0.18794929981, -0.02449743077, 0.03094954416, 0.44947320223, 0.16917343438, -0.30685693026},
{-0.08407333493, 0.57998371124, 0.34822252393, 0.56157398224, 0.24396187067, 0.32406872511, 0.18443229795, 0.53056180477, 0.13178956509}};
const float depthwise_1_weights[3][9] = {{-0.35425487161, -0.23616252840, -0.22997750342, 0.24456115067, -0.20624010265, -0.00013742072, 0.26879012585, -0.26807692647, 0.01581084728},
{0.07073932886, 0.48199057579, -0.10555473715, -0.19673484564, -0.22651126981, -0.01001045667, 0.37793800235, -0.19346579909, -0.32536745071},
{-0.28408280015, -0.47497844696, -0.11035950482, 0.36142480373, 0.21333804727, 0.31504249573, 0.41812920570, -0.31300476193, 0.04843502119}};
const float pointwise_1_weights[6][3] = {{-0.09117162228, 0.82249736786, -0.20695370436},
{-0.85948276520, 0.19161477685, 1.10251903534},
{-0.71232938766, -0.03556078672, -0.08283454180},
{0.46851244569, 0.76725500822, 0.50938290358},
{-0.25427860022, -0.72349339724, -0.79621297121},
{-0.47162520885, 0.25396701694, -0.49285510182}};
const float depthwise_2_weights[6][9] = {{0.35895720124, 0.23784333467, -0.07534114271, -0.24524940550, -0.04549394548, -0.26059886813, 0.28477019072, -0.00712140510, -0.27584657073},
{0.51091092825, 0.62111812830, -0.05044540763, 0.52927201986, 0.25487384200, 0.33370712399, 0.18966604769, 0.11532534659, 0.16275504231},
{0.29592916369, -0.15394289792, 0.12004908919, 0.07607766986, 0.24132254720, -0.20747616887, -0.04176491499, -0.27908378839, 0.13809236884},
{0.02603448555, 0.25732424855, -0.36077418923, 0.07963499427, -0.01887336187, -0.33278352022, 0.24708244205, -0.19360905886, 0.13639056683},
{0.21229699254, 0.00662785769, -0.19093543291, -0.09667769074, -0.05552545190, 0.01491189003, -0.20547568798, 0.27846303582, -0.03822237253},
{-0.06207026541, 0.15863336623, -0.16388191283, 0.15762025118, -0.19919943810, -0.07231600583, -0.13571833074, -0.02249273658, -0.21008148789}};
const float pointwise_2_weights[12][6] = {{0.00063503534, 0.48058623075, -0.36563238502, 0.51302105188, 0.21660172939, -0.32831248641},
{-0.50620383024, -0.35271933675, 0.48594498634, -0.50641745329, 0.35882031918, 0.22168761492},
{-0.43700292706, -0.38995116949, -0.53454947472, -0.06846392155, -0.13144075871, 0.07794147730},
{0.37599769235, -0.43717575073, 0.38039886951, -0.55730378628, 0.38651198149, -0.11370623112},
{-0.02578501403, -0.49355089664, 0.18120104074, 0.10755521804, -0.52842414379, 0.23777391016},
{0.32847705483, -0.22344671190, 0.04166835546, 0.46865811944, 0.56128060818, -0.33481889963},
{-0.31596237421, -0.04046857357, 0.37702673674, -0.46384748816, 0.48273956776, -0.26779732108},
{0.49115952849, -0.01721261069, 0.42422366142, -0.06754394621, 0.39422857761, 0.13803933561},
{-0.10960361362, 0.57471114397, -0.27886447310, 0.14179545641, -0.08440622687, -0.20258137584},
{0.35399246216, 0.33130452037, 0.31853711605, 0.43343657255, -0.34315919876, 0.11230981350},
{0.31758838892, 0.80079740286, -0.42914772034, 0.01409940235, -0.23797056079, -0.39399114251},
{-0.22277000546, -0.39749902487, 0.18530112505, -0.02303826809, -0.17399805784, -0.52532655001}};
const float depthwise_3_weights[12][9] = {{0.43468713760, 0.27347934246, 0.26273813844, -0.04167792201, 0.16433015466, -0.12557785213, 0.50365906954, 0.19630661607, 0.48740977049},
{0.04266124964, -0.16427403688, 0.08567957580, 0.17073456943, 0.21095471084, -0.03015229106, -0.01741993427, -0.11063526571, 0.06490472704},
{0.18609096110, 0.04467250407, -0.16391515732, -0.18312613666, 0.01121036988, 0.10224457085, 0.13521514833, -0.10507968068, 0.08436971158},
{0.15235303342, -0.04396350682, 0.13825593889, -0.05687613785, 0.11330153048, -0.20187327266, -0.00074075162, 0.04703579471, -0.15467861295},
{0.00457271701, 0.24002927542, 0.04937255010, 0.10910945386, 0.19974878430, 0.10104069859, 0.06970567256, 0.12173694372, -0.26547023654},
{-0.07644123584, -0.21463918686, 0.02911583148, -0.18811574578, 0.09112719446, 0.11265213788, -0.01146453898, -0.00454099616, -0.14186237752},
{0.06569774449, -0.02157643437, 0.19389937818, 0.19328580797, 0.14999692142, 0.15664963424, -0.02148352563, 0.22635035217, -0.17734022439},
{0.10683796555, 0.05706703290, 0.21315959096, -0.05024305359, 0.19207747281, 0.00748820370, 0.01584845968, 0.21641007066, 0.21460226178},
{-0.19390374422, -0.14212438464, -0.23052616417, -0.07321915030, 0.27654990554, 0.06163945794, -0.16758939624, 0.33695048094, 0.07404905558},
{-0.22942769527, -0.17088742554, 0.02193632536, 0.35620847344, 0.20238873363, 0.31436139345, -0.34888097644, -0.39686203003, -0.25719842315},
{0.32601067424, 0.25143274665, 0.26007786393, 0.03177319467, 0.19906532764, -0.17288939655, 0.37424448133, 0.12854650617, 0.31418201327},
{-0.15625628829, 0.01465466619, 0.05003134906, 0.09593714774, 0.12668652833, -0.22036601603, 0.09534128010, -0.10707643628, 0.18672065437}};
const float pointwise_3_weights[24][12] = {{-0.33242326975, 0.32112824917, -0.34323650599, -0.19911736250, 0.16705293953, 0.01022295561, -0.30409169197, 0.01617346518, -0.12629058957, 0.02393108793, 0.16568218172, 0.10778385401},
{0.45878762007, -0.34416934848, -0.18845045567, -0.29416739941, 0.03930538893, 0.00911496207, -0.26870962977, 0.33837494254, -0.22767841816, -0.06902970374, 0.72774738073, -0.07464709878},
{-0.15224465728, 0.25462841988, 0.16669172049, -0.11165502667, -0.40522453189, -0.29562169313, 0.07739520073, -0.31246533990, -0.39020511508, -0.39069032669, -0.28850054741, 0.08244985342},
{0.36490887403, -0.08350038528, 0.36102640629, -0.15604324639, -0.29153943062, 0.00717086066, -0.10952049494, -0.08671616763, -0.32154598832, 0.00325861014, 0.62585133314, -0.19151093066},
{0.33634936810, 0.09808677435, 0.00602692366, 0.03131084517, 0.28147563338, 0.38172629476, -0.10785871744, 0.14068019390, -0.25693386793, -0.10714749992, 0.56367319822, 0.39001709223},
{0.61772930622, 0.13471519947, 0.04854518548, -0.00608583074, -0.01802019961, -0.37180465460, -0.00401133299, 0.33786326647, -0.12424690276, -0.28323772550, 0.39886355400, -0.29021489620},
{0.04374134541, -0.37529510260, 0.06371120363, 0.04935376719, 0.02217261866, 0.30530115962, -0.23669055104, 0.06503948569, 0.01679839753, 0.48886525631, -0.27314275503, -0.34578859806},
{-0.01946349815, -0.22470764816, 0.09743384272, -0.02405748889, 0.29618066549, -0.25876462460, -0.34475395083, 0.21627385914, -0.10402329266, 0.19197782874, -0.42406490445, 0.25275290012},
{0.63850814104, -0.23805379868, 0.27066165209, 0.25643539429, 0.37469926476, -0.11610201001, -0.29154029489, -0.02091231011, -0.19745121896, -0.16873377562, 0.24683740735, 0.06828689575},
{-0.13936965168, -0.23779028654, -0.39054390788, -0.16376510262, -0.08882397413, 0.41016262770, 0.18596971035, -0.25503417850, 0.36027094722, -0.28058910370, -0.23572920263, -0.37204989791},
{0.40478792787, 0.18747282028, 0.32068753242, 0.29423406720, 0.36582773924, 0.20912985504, -0.40184441209, 0.28225576878, -0.12299652398, -0.08685125411, 0.47822484374, -0.04130911827},
{0.03607089445, 0.18253159523, 0.33984237909, 0.10524179041, -0.24410746992, 0.09393396229, -0.19550697505, -0.38074532151, 0.29552531242, 0.45382964611, -0.38232478499, 0.19928234816},
{-0.36115184426, -0.03652039170, 0.13133662939, 0.12104801089, 0.02864059806, 0.13996993005, 0.27846491337, -0.33686831594, 0.00807717629, 0.44147452712, 0.10711596906, 0.24524736404},
{-0.47024169564, 0.03156501055, -0.07844874263, -0.39789190888, 0.19768652320, 0.18511816859, 0.37028717995, 0.14206841588, 0.42253586650, 0.48465278745, 0.15217860043, 0.29450023174},
{0.25448688865, 0.04511585832, 0.28480070829, -0.01936831698, 0.14015029371, 0.14111196995, -0.12783417106, 0.19556348026, 0.01343316026, -0.15595127642, 0.42841988802, 0.18192327023},
{-0.14106854796, 0.30786058307, 0.01031529903, 0.26337873936, 0.15846376121, 0.28183579445, 0.10774451494, -0.32656732202, 0.35945361853, -0.24068254232, -0.35250753164, 0.21529477835},
{0.61793971062, -0.02668470144, 0.32367497683, -0.38035601377, -0.07861428708, 0.39642265439, -0.29068851471, 0.15355134010, 0.11719731987, -0.24253894389, 0.65259689093, -0.14480277896},
{0.32112109661, -0.17774070799, 0.35220271349, -0.19650655985, 0.04403254017, -0.16583316028, 0.01203846931, 0.21680326760, 0.06325855851, -0.06204054505, -0.15532730520, -0.16143210232},
{0.18882560730, -0.23018650711, 0.23278686404, -0.09548929334, 0.36104604602, -0.22795256972, -0.32385107875, -0.24018083513, -0.30034396052, -0.00216405559, -0.14133879542, 0.02028653026},
{0.37537524104, -0.29309490323, -0.00938728452, -0.29328131676, 0.34983760118, -0.09490379691, 0.35412961245, -0.01121471357, 0.32010972500, -0.30494660139, 0.42134764791, -0.16008812189},
{-0.64491266012, 0.26568508148, -0.12225946784, 0.18564531207, 0.28982475400, -0.35740262270, 0.25998938084, 0.23804895580, 0.10513759404, 0.42896071076, 0.20906865597, -0.34997045994},
{-0.20663002133, -0.14476191998, 0.06242525578, -0.25408589840, -0.16947199404, -0.19733618200, -0.00828516483, -0.14818486571, -0.18266969919, -0.00224149227, -0.38471829891, 0.28299516439},
{0.13125912845, -0.39106506109, -0.03080397844, 0.24858620763, 0.02611339465, 0.11944877356, -0.20792202652, -0.11530396342, 0.22887417674, -0.09155285358, -0.18884035945, 0.32079821825},
{0.15044319630, -0.14401194453, -0.18125914037, -0.28057807684, 0.08085042238, -0.18810100853, -0.27874565125, -0.07419086248, 0.22713482380, -0.21203750372, 0.15332724154, 0.09756356478}};
const float conv2d_last_weights[4][24] = {{0.18572680652, 0.10660002381, 0.34570229053, -0.13073261082, 0.04635358602, 0.40537646413, -0.42621853948, -0.19123505056, -0.13323357701, -0.08411549777, -0.28852567077, -0.40656703711, -0.04649058729, -0.26070696115, 0.37145623565, -0.20005775988, 0.16340047121, -0.51885360479, -0.43397870660, -0.09329127520, -0.35178303719, -0.18354952335, -0.06395643204, -0.22809614241},
{0.31888130307, 0.52352815866, 0.04355543852, 0.25176772475, 0.62861680984, 0.51203912497, -0.19495809078, -0.17149986327, 0.43246814609, 0.20671278238, 0.51400816441, 0.10310800374, -0.10653696954, -0.14129965007, -0.20243576169, -0.25948125124, 0.46354496479, 0.20531697571, -0.29653131962, 0.35552129149, -0.37847694755, -0.20610806346, -0.23213934898, 0.28872734308},
{-0.10687508434, -0.63240849972, 0.11593413353, -0.77483934164, -0.24154321849, -0.78893947601, 0.58301407099, 0.12191224843, -0.38278028369, -0.57541137934, -0.58911901712, 0.58145040274, 0.60270369053, 0.19607372582, -0.78608351946, -0.53846079111, -0.15513655543, -0.04214842990, -0.14708326757, -0.82148188353, 0.37131208181, 0.31574213505, -0.44903215766, -0.35200369358},
{-0.44866114855, 0.28538855910, -0.26954653859, 0.23931820691, -0.21181413531, -0.25706809759, -0.52325546741, 0.09330182523, 0.05173591897, 0.17546258867, 0.26352429390, -0.04025872424, -0.25341644883, -0.26888474822, 0.21390053630, -0.05746645853, 0.48053106666, 0.40644150972, 0.51892417669, 0.22470143437, -0.40383216739, 0.06014847755, 0.10987602919, 0.12020519376}};
const float conv2d_last_bias[4] = {0.03802097589, -0.09491597861, 0.07504304498, -0.01769104972};
/**
* Multiply 3x3 kernel on single 3x3 image patch
*
* @param inputs
* @param kernel
* @param width
*/
inline float mult3x3(float *inputs, const float kernel[9], const uint16_t width) {
const float *i1 = inputs;
const float *i2 = inputs + width;
const float *i3 = inputs + width + width;
return i1[0] * kernel[0] +
i1[1] * kernel[1] +
i1[2] * kernel[2] +
i2[0] * kernel[3] +
i2[1] * kernel[4] +
i2[2] * kernel[5] +
i3[0] * kernel[6] +
i3[1] * kernel[7] +
i3[2] * kernel[8];
}
/**
* Zero padding 2D
*
* @param inputs
* @param outputs
* @param width
*/
void pad(float *inputs, float *outputs, uint16_t width) {
const uint16_t paddedWidth = width + 2;
uint16_t i = 0;
uint16_t o = 0;
// first row of zeros
for (uint16_t x = 0; x < paddedWidth; x++)
outputs[o++] = 0;
for (uint16_t y = 0; y < width; y++) {
outputs[o++] = 0;
for (uint16_t x = 0; x < width; x++)
outputs[o++] = inputs[i++];
outputs[o++] = 0;
}
// last row of zeros
for (uint16_t x = 0; x < paddedWidth; x++)
outputs[o++] = 0;
}
/**
* Depthwise 3x3 convolution without ReLU
*
* @param input
* @param output
* @param kernel
* @param width
* @param stride
*/
void conv2d_3x3x1(float *input, float *output, const float *kernel, const uint16_t width, uint8_t stride) {
uint16_t o = 0;
for (uint16_t y = 0; y <= width - 3; y += stride) {
const uint16_t offset = y * width;
float *i = input + offset;
for (uint16_t x = 0; x <= width - 3; x += stride) {
output[o++] = this->mult3x3(i + x, kernel, width);
}
}
}
/**
* Depthwise 3x3 convolution with ReLU
*
* @param inputs
* @param outputs
* @param kernel
* @param width
* @param stride
*/
void depthwise_conv(float *inputs, float *outputs, const float *kernel, const uint16_t width, uint8_t stride) {
uint16_t o = 0;
for (uint16_t y = 0; y <= width - 3; y += stride) {
const uint16_t offset = y * width;
float *i = inputs + offset;
for (uint16_t x = 0; x <= width - 3; x += stride) {
float val = this->mult3x3(i + x, kernel, width);
if (val < 0) val = 0;
else if (val > 6) val = 6;
outputs[o++] = val;
}
}
}
/**
* Pointwise 1x1 convolution with ReLU
*
* @param inputs
* @param outputs
* @param kernel
* @param width
* @param channels
*/
void pointwise_conv(float *inputs, float *outputs, const float *kernel, const uint16_t width, const uint16_t channels) {
const uint16_t size = width * width;
uint16_t o = 0;
for (uint16_t y = 0; y < width; y += 1) {
const uint16_t offset = y * width;
for (uint16_t x = 0; x < width; x += 1) {
float val = 0;
for (uint16_t c = 0; c < channels; c++)
val += inputs[(offset + x) + size * c] * kernel[c];
if (val < 0) val = 0;
else if (val > 6) val = 6;
outputs[o++] = val;
}
}
}
/**
* (Global) MaxPooling
*
* @param inputs
* @param outputs
* @param width
* @param channels
*/
void maxpool(float *inputs, float *outputs, const uint16_t width, const uint16_t channels) {
const uint16_t size = width * width;
for (uint16_t c = 0; c < channels; c++) {
const uint16_t offset = size * c;
float *in = inputs + offset;
float greatest = in[0];
for (uint16_t j = 1; j < size; j++)
if (in[j] > greatest)
greatest = in[j];
outputs[c] = greatest;
}
}
/**
* Dot product with ReLU
* @param inputs
* @param outputs
* @param kernel
* @param bias
* @param length
*/
void dot(float *inputs, float *outputs, const float *weights, const float bias, const uint16_t length) {
float sum = 0;
for (uint16_t i = 0; i < length; i++)
sum += inputs[i] * weights[i];
outputs[0] = sum + bias;
}
/**
* Softmax activation
*
* @param inputs
* @param outputs
* @param numOutputs
*/
void softmax(float *inputs, float *outputs, uint16_t numOutputs) {
float sum = 0;
for (uint16_t i = 0; i < numOutputs; i++) {
const float e = exp(inputs[i]);
outputs[i] = e;
sum += e;
}
for (uint16_t i = 0; i < numOutputs; i++)
outputs[i] /= sum;
}
};
This is a fully self-contained, statically allocated class that implements the MobileNet variation of choice. It doesn't required external runtimes to run, doesn't require a TENSOR_ARENA_SIZE
to be defined beforehand, doesn't throw cryptic errors during compilation nor execution.
It is written as plain C++ and doesn't contain any vendor-specific optimizations at the moment (e.g. CMSIS for ARM Cortex chipsets). They will be added in future versions, if demand supports the effort.
Use with Arduino
To use the network with Arduino (or any other framework, actually), you need to just include the generated class and call predict()
on the instance.
// sample image is a float[96 * 96] array
#include "sample_image.h"
#include "MobileNet.h"
MobileNet net;
void setup() {
Serial.begin(115200);
Serial.println("MobileNet demo");
// no complicated setup!
}
void loop() {
size_t start = micros();
net.predict(sample_image);
Serial.print("Predicted output = ");
Serial.println(net.output);
Serial.print("It took ");
Serial.print(micros() - start);
Serial.println(" us to run MobileNet");
delay(2000);
}
Benchmarks
How small are these MobileNet variations? How fast do they run? Making an extensive test would take a lot of time, so I run most of the experiments on an ESP32S3 board and a couple more on an Arduino Nano 33 BLE Sense (ARM Cortex M4) and Arduino Portenta H7 (ARM Cortex M7).
Benchmarks for ESP32S3
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Architecture ┃ Program space (kb) ┃ RAM (kb) ┃ Execution time (us) ┃
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩━━━━━━━━━━━━━━━━━━━━━┩
│ Pico │ 4,52 │ 30,55 │ 2832 |
├────────────────────┼────────────────────┼────────────┤─────────────────────┤
│ Nano │ 8,54 │ 64,23 │ 6543 |
├────────────────────┼────────────────────┼────────────┤─────────────────────┤
│ Micro │ 19,75 │ 132,36 │ 31987 |
├────────────────────┼────────────────────┼────────────┤─────────────────────┤
│ Milli │ 49,70 │ 162,12 │ 37641 |
├────────────────────┼────────────────────┼────────────┤─────────────────────┤
│ Base │ 123,60 │ 235,47 │ 53944 |
└────────────────────┴────────────────────┴────────────┴─────────────────────┘
Benchmarks for Arduino Nano 33 BLE Sense (Cortex M4)
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Architecture ┃ Program space (kb) ┃ RAM (kb) ┃ Execution time (us) ┃
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩━━━━━━━━━━━━━━━━━━━━━┩
│ Pico │ 5,42 │ 31,17 │ 10969 |
├────────────────────┼────────────────────┼────────────┤─────────────────────┤
│ Nano │ 9,27 │ 64,22 │ 25169 |
├────────────────────┼────────────────────┼────────────┤─────────────────────┤
│ Micro │ 19,75 │ 132,36 │ 123669 |
└────────────────────┴────────────────────┴────────────┴─────────────────────┘
Benchmarks for Portenta H7 (Cortex M7)
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓
┃ Architecture ┃ Execution time (us) ┃
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩
│ Nano │ 3000 |
├────────────────────┼──────────────────────┤
│ Base │ 25357 │
└────────────────────┴──────────────────────┘
Regarding the execution speed, you can see that for the widely available ESP32S3 chip, you can get the middle Micro network to run in 32ms. This should allow realtime camera frames processing in most of vision-based projects.
The Arduino Portenta H7 runs roughly twice as fast as the ESP32S3. The Arduino Nano BLE Sense is much slower at every scale.
Accuracy
Ok, these variations of MobileNet are small and fast. But are they accurate enough? It depends on you dataset. These networks are meant to be trained from scratch on your data and the results may vary. I was able to achieve >90% accuracy on a 10 textures classification project and 75% accuracy on a subset of Fashion MNIST training PicoMobileNet for 20 epochs, so good results are totally achievable.
If you find that on your dataset you can't get satisfactory results, then you should explore alternatives (e.g. EdgeImpulse, that uses the full version of MobileNetV1 with transfer learning).
Entire Python script
import os.path
from collections import Counter
import numpy as np
from os import listdir
from glob import glob
from PIL import Image
from keras.utils import to_categorical
from sklearn.metrics import classification_report
from micromobilenet import PicoMobileNet
def load_folder(folder: str):
"""
Load images from folder as [0, 1] floats
:param folder:
:return:
"""
for filename in sorted(glob(f"{folder}/*.jpg") + glob(f"{folder}/*.jpeg")):
yield np.asarray(Image.open(filename).convert("L"), dtype=float) / 255.
def load_split(root: str, split_name: str):
"""
Load images from train/val/test folder
:param root:
:param split_name:
:return:
"""
X = []
Y = []
folders = listdir(f"{root}/{split_name}")
folders = [f"{root}/{split_name}/{f}" for f in folders if os.path.isdir(f"{root}/{split_name}/{f}")]
for k, folder in enumerate(sorted(folders)):
folder_x = list(load_folder(folder))
X += folder_x
Y += [k] * len(folder_x)
# shuffle inputs
shuffle_mask = np.random.permutation(len(X))
X = np.asarray(X)[shuffle_mask]
Y = np.asarray(Y)[shuffle_mask]
return X, Y
def make_data():
train_x, train_y = load_split("DATA", "train")
val_x, val_y = load_split("DATA", "val")
test_x, test_y = load_split("DATA", "test")
print("train counts", Counter(train_y))
print("val counts", Counter(val_y))
print("test counts", Counter(test_y))
return train_x, to_categorical(train_y), val_x, to_categorical(val_y), test_x, to_categorical(test_y)
def make_network(num_classes: int):
net = PicoMobileNet(num_classes=num_classes)
net.config.learning_rate = 0.01
net.config.batch_size = 32
net.config.verbosity = 1
net.config.loss = "categorical_crossentropy"
net.config.metrics = ["categorical_accuracy"]
net.config.checkpoint_path = "./checkpoints/pico"
net.build()
net.compile()
return net
if __name__ == '__main__':
train_x, train_y, val_x, val_y, test_x, test_y = make_data()
net = make_network(num_classes=10)
net.fit(train_x, train_y, val_x, val_y, epochs=30)
predictions = net.predict(test_x)
print(classification_report(test_y.argmax(axis=1), predictions.argmax(axis=1)))
print(net.convert.to_cpp())
Architectures
There are different architectures available in the micromobilenet package. They differ in the number of layers, ranging from the "canonical" MobileNet down to an extremely lightweight version with only N layers.
MobileNet
:noscroll:
Total params: 30,040 (117.34 KB)
Trainable params: 30,040 (117.34 KB)
Non-trainable params: 0 (0.00 B)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d_0 (Conv2D) │ (None, 47, 47, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__padding │ (None, 49, 49, 3) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__dw (DepthwiseConv2D) │ (None, 47, 47, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_1 (ReLU) │ (None, 47, 47, 3) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__pw (Conv2D) │ (None, 47, 47, 6) │ 18 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_2 (ReLU) │ (None, 47, 47, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__padding │ (None, 49, 49, 6) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__dw (DepthwiseConv2D) │ (None, 24, 24, 6) │ 54 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_1 (ReLU) │ (None, 24, 24, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__pw (Conv2D) │ (None, 24, 24, 12) │ 72 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_2 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__padding │ (None, 26, 26, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__dw (DepthwiseConv2D) │ (None, 24, 24, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_1 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__pw (Conv2D) │ (None, 24, 24, 12) │ 144 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_2 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__padding │ (None, 26, 26, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__dw (DepthwiseConv2D) │ (None, 12, 12, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_1 (ReLU) │ (None, 12, 12, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__pw (Conv2D) │ (None, 12, 12, 24) │ 288 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_2 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__padding │ (None, 14, 14, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__dw (DepthwiseConv2D) │ (None, 12, 12, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__relu_1 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__pw (Conv2D) │ (None, 12, 12, 24) │ 576 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__relu_2 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__padding │ (None, 14, 14, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__dw (DepthwiseConv2D) │ (None, 6, 6, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__relu_1 (ReLU) │ (None, 6, 6, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__pw (Conv2D) │ (None, 6, 6, 48) │ 1,152 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__relu_2 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__padding │ (None, 8, 8, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__dw (DepthwiseConv2D) │ (None, 6, 6, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__relu_1 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__pw (Conv2D) │ (None, 6, 6, 48) │ 2,304 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__relu_2 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__padding │ (None, 8, 8, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__dw (DepthwiseConv2D) │ (None, 6, 6, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__relu_1 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__pw (Conv2D) │ (None, 6, 6, 48) │ 2,304 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__relu_2 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__padding │ (None, 8, 8, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__dw (DepthwiseConv2D) │ (None, 6, 6, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__relu_1 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__pw (Conv2D) │ (None, 6, 6, 48) │ 2,304 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__relu_2 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_10__padding │ (None, 8, 8, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_10__dw (DepthwiseConv2D) │ (None, 6, 6, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_10__relu_1 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_10__pw (Conv2D) │ (None, 6, 6, 48) │ 2,304 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_10__relu_2 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_11__padding │ (None, 8, 8, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_11__dw (DepthwiseConv2D) │ (None, 3, 3, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_11__relu_1 (ReLU) │ (None, 3, 3, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_11__pw (Conv2D) │ (None, 3, 3, 96) │ 4,608 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_11__relu_2 (ReLU) │ (None, 3, 3, 96) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_12__padding │ (None, 5, 5, 96) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_12__dw (DepthwiseConv2D) │ (None, 3, 3, 96) │ 864 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_12__relu_1 (ReLU) │ (None, 3, 3, 96) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_12__pw (Conv2D) │ (None, 3, 3, 96) │ 9,216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_12__relu_2 (ReLU) │ (None, 3, 3, 96) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ maxpool_last (MaxPool2D) │ (None, 1, 1, 96) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout) │ (None, 1, 1, 96) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_last (Conv2D) │ (None, 1, 1, 10) │ 970 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ reshape (Reshape) │ (None, 10) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ softmax (Softmax) │ (None, 10) │ 0 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
MilliImageNet
:noscroll:
Total params: 11,704 (45.72 KB)
Trainable params: 11,704 (45.72 KB)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d_0 (Conv2D) │ (None, 47, 47, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__padding │ (None, 49, 49, 3) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__dw (DepthwiseConv2D) │ (None, 47, 47, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_1 (ReLU) │ (None, 47, 47, 3) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__pw (Conv2D) │ (None, 47, 47, 6) │ 18 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_2 (ReLU) │ (None, 47, 47, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__padding │ (None, 49, 49, 6) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__dw (DepthwiseConv2D) │ (None, 24, 24, 6) │ 54 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_1 (ReLU) │ (None, 24, 24, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__pw (Conv2D) │ (None, 24, 24, 12) │ 72 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_2 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__padding │ (None, 26, 26, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__dw (DepthwiseConv2D) │ (None, 24, 24, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_1 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__pw (Conv2D) │ (None, 24, 24, 12) │ 144 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_2 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__padding │ (None, 26, 26, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__dw (DepthwiseConv2D) │ (None, 12, 12, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_1 (ReLU) │ (None, 12, 12, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__pw (Conv2D) │ (None, 12, 12, 24) │ 288 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_2 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__padding │ (None, 14, 14, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__dw (DepthwiseConv2D) │ (None, 12, 12, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__relu_1 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__pw (Conv2D) │ (None, 12, 12, 24) │ 576 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__relu_2 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__padding │ (None, 14, 14, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__dw (DepthwiseConv2D) │ (None, 6, 6, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__relu_1 (ReLU) │ (None, 6, 6, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__pw (Conv2D) │ (None, 6, 6, 48) │ 1,152 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__relu_2 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__padding │ (None, 8, 8, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__dw (DepthwiseConv2D) │ (None, 6, 6, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__relu_1 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__pw (Conv2D) │ (None, 6, 6, 48) │ 2,304 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__relu_2 (ReLU) │ (None, 6, 6, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__padding │ (None, 8, 8, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__dw (DepthwiseConv2D) │ (None, 3, 3, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__relu_1 (ReLU) │ (None, 3, 3, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__pw (Conv2D) │ (None, 3, 3, 48) │ 2,304 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__relu_2 (ReLU) │ (None, 3, 3, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__padding │ (None, 5, 5, 48) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__dw (DepthwiseConv2D) │ (None, 3, 3, 48) │ 432 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__relu_1 (ReLU) │ (None, 3, 3, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__pw (Conv2D) │ (None, 3, 3, 48) │ 2,304 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_9__relu_2 (ReLU) │ (None, 3, 3, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ maxpool_last (MaxPool2D) │ (None, 1, 1, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout) │ (None, 1, 1, 48) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_last (Conv2D) │ (None, 1, 1, 10) │ 490 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ reshape (Reshape) │ (None, 10) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ softmax (Softmax) │ (None, 10) │ 0 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
MicroMobileNet
:noscroll:
Total params: 4,264 (16.66 KB)
Trainable params: 4,264 (16.66 KB)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d_0 (Conv2D) │ (None, 47, 47, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__padding │ (None, 49, 49, 3) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__dw (DepthwiseConv2D) │ (None, 47, 47, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_1 (ReLU) │ (None, 47, 47, 3) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__pw (Conv2D) │ (None, 47, 47, 6) │ 18 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_2 (ReLU) │ (None, 47, 47, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__padding │ (None, 49, 49, 6) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__dw (DepthwiseConv2D) │ (None, 24, 24, 6) │ 54 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_1 (ReLU) │ (None, 24, 24, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__pw (Conv2D) │ (None, 24, 24, 12) │ 72 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_2 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__padding │ (None, 26, 26, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__dw (DepthwiseConv2D) │ (None, 24, 24, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_1 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__pw (Conv2D) │ (None, 24, 24, 12) │ 144 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_2 (ReLU) │ (None, 24, 24, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__padding │ (None, 26, 26, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__dw (DepthwiseConv2D) │ (None, 12, 12, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_1 (ReLU) │ (None, 12, 12, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__pw (Conv2D) │ (None, 12, 12, 24) │ 288 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_2 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__padding │ (None, 14, 14, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__dw (DepthwiseConv2D) │ (None, 12, 12, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__relu_1 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__pw (Conv2D) │ (None, 12, 12, 24) │ 576 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_5__relu_2 (ReLU) │ (None, 12, 12, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__padding │ (None, 14, 14, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__dw (DepthwiseConv2D) │ (None, 6, 6, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__relu_1 (ReLU) │ (None, 6, 6, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__pw (Conv2D) │ (None, 6, 6, 24) │ 576 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_6__relu_2 (ReLU) │ (None, 6, 6, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__padding │ (None, 8, 8, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__dw (DepthwiseConv2D) │ (None, 6, 6, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__relu_1 (ReLU) │ (None, 6, 6, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__pw (Conv2D) │ (None, 6, 6, 24) │ 576 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_7__relu_2 (ReLU) │ (None, 6, 6, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__padding │ (None, 8, 8, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__dw (DepthwiseConv2D) │ (None, 3, 3, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__relu_1 (ReLU) │ (None, 3, 3, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__pw (Conv2D) │ (None, 3, 3, 24) │ 576 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_8__relu_2 (ReLU) │ (None, 3, 3, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ maxpool_last (MaxPool2D) │ (None, 1, 1, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout) │ (None, 1, 1, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_last (Conv2D) │ (None, 1, 1, 10) │ 250 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ reshape (Reshape) │ (None, 10) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ softmax (Softmax) │ (None, 10) │ 0 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
NanoMobileNet
:noscroll:
Total params: 1,636 (6.39 KB)
Trainable params: 1,636 (6.39 KB)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d_0 (Conv2D) │ (None, 47, 47, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__padding │ (None, 49, 49, 3) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__dw (DepthwiseConv2D) │ (None, 24, 24, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_1 (ReLU) │ (None, 24, 24, 3) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__pw (Conv2D) │ (None, 24, 24, 6) │ 18 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_2 (ReLU) │ (None, 24, 24, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__padding │ (None, 26, 26, 6) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__dw (DepthwiseConv2D) │ (None, 12, 12, 6) │ 54 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_1 (ReLU) │ (None, 12, 12, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__pw (Conv2D) │ (None, 12, 12, 12) │ 72 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_2 (ReLU) │ (None, 12, 12, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__padding │ (None, 14, 14, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__dw (DepthwiseConv2D) │ (None, 6, 6, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_1 (ReLU) │ (None, 6, 6, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__pw (Conv2D) │ (None, 6, 6, 24) │ 288 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_2 (ReLU) │ (None, 6, 6, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__padding │ (None, 8, 8, 24) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__dw (DepthwiseConv2D) │ (None, 3, 3, 24) │ 216 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_1 (ReLU) │ (None, 3, 3, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__pw (Conv2D) │ (None, 3, 3, 24) │ 576 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_4__relu_2 (ReLU) │ (None, 3, 3, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ maxpool_last (MaxPool2D) │ (None, 1, 1, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout) │ (None, 1, 1, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_last (Conv2D) │ (None, 1, 1, 10) │ 250 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ reshape (Reshape) │ (None, 10) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ softmax (Softmax) │ (None, 10) │ 0 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
PicoMobileNet
:noscroll:
Total params: 844 (3.30 KB)
Trainable params: 844 (3.30 KB)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type) ┃ Output Shape ┃ Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d_0 (Conv2D) │ (None, 32, 32, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__padding │ (None, 34, 34, 3) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__dw (DepthwiseConv2D) │ (None, 16, 16, 3) │ 27 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_1 (ReLU) │ (None, 16, 16, 3) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__pw (Conv2D) │ (None, 16, 16, 6) │ 18 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_1__relu_2 (ReLU) │ (None, 16, 16, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__padding │ (None, 18, 18, 6) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__dw (DepthwiseConv2D) │ (None, 8, 8, 6) │ 54 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_1 (ReLU) │ (None, 8, 8, 6) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__pw (Conv2D) │ (None, 8, 8, 12) │ 72 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_2__relu_2 (ReLU) │ (None, 8, 8, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__padding │ (None, 10, 10, 12) │ 0 │
│ (ZeroPadding2D) │ │ │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__dw (DepthwiseConv2D) │ (None, 4, 4, 12) │ 108 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_1 (ReLU) │ (None, 4, 4, 12) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__pw (Conv2D) │ (None, 4, 4, 24) │ 288 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ hidden_3__relu_2 (ReLU) │ (None, 4, 4, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ maxpool_last (MaxPool2D) │ (None, 1, 1, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout) │ (None, 1, 1, 24) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_last (Conv2D) │ (None, 1, 1, 10) │ 250 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ reshape (Reshape) │ (None, 10) │ 0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ softmax (Softmax) │ (None, 10) │ 0 │
└─────────────────────────────────┴────────────────────────┴───────────────┘