1
1
/*
2
- * Image recognition using Google's Inception network
2
+ * Image recognition using Google's Inception v3 network
3
3
* based on https://www.tensorflow.org/versions/master/tutorials/image_recognition/index.html
4
4
*
5
- *
6
5
* Uses pre-trained model https://storage.googleapis.com/download.tensorflow.org/models/inception_dec_2015.zip
7
6
*
8
7
* openFrameworks code loads and processes pre-trained model (i.e. makes calculations/predictions)
15
14
#include " ofxMSATensorFlow.h"
16
15
17
16
18
- // input image dimensions dictated by trained model
19
- #define kInputWidth 299
20
- #define kInputHeight 299
21
- #define kInputSize (kInputWidth * kInputHeight )
22
-
23
-
24
- // we need to normalize the images before feeding into the network
25
- // from each pixel we subtract the mean and divide by variance
26
- // this is also dictated by the trained model
27
- #define kInputMean (128 .0f /255 .0f )
28
- #define kInputStd (128 .0f /255 .0f )
29
-
30
- // model & labels files to load
31
- #define kModelPath " models/tensorflow_inception_graph.pb"
32
- #define kLabelsPath " models/imagenet_comp_graph_label_strings.txt"
33
-
34
-
35
- // every node in the network has a name
36
- // when passing in data to the network, or reading data back, we need to refer to the node by name
37
- // i.e. 'pass this data to node A', or 'read data back from node X'
38
- // these node names are specific to the architecture of the model
39
- #define kInputLayer " Mul"
40
- #define kOutputLayer " softmax"
41
-
42
-
43
-
44
17
// --------------------------------------------------------------
45
18
// ofImage::load() (ie. Freeimage load) doesn't work with TensorFlow! (See README.md)
46
19
// so I have to resort to this awful trick of loading raw image data 299x299 RGB
47
- static void loadImageRaw (string path, ofImage &img) {
20
+ static void loadImageRaw (string path, ofImage &img, int w, int h ) {
48
21
ofFile file (path);
49
- img.setFromPixels ((unsigned char *)file.readToBuffer ().getData (), kInputWidth , kInputHeight , OF_IMAGE_COLOR);
50
- }
51
-
52
-
53
-
54
- // --------------------------------------------------------------
55
- // Takes a file name, and loads a list of labels from it, one per line, and
56
- // returns a vector of the strings. It pads with empty strings so the length
57
- // of the result is a multiple of 16, because our model expects that.
58
- static bool ReadLabelsFile (string file_name, std::vector<string>* result) {
59
- std::ifstream file (file_name);
60
- if (!file) {
61
- ofLogError () <<" ReadLabelsFile: " << file_name << " not found." ;
62
- return false ;
63
- }
64
-
65
- result->clear ();
66
- string line;
67
- while (std::getline (file, line)) {
68
- result->push_back (line);
69
- }
70
- const int padding = 16 ;
71
- while (result->size () % padding) {
72
- result->emplace_back ();
73
- }
74
- return true ;
22
+ img.setFromPixels ((unsigned char *)file.readToBuffer ().getData (), w, h, OF_IMAGE_COLOR);
75
23
}
76
24
77
25
78
26
79
- class ofApp : public ofBaseApp {
27
+ class ofApp : public ofBaseApp {
80
28
public:
81
29
82
- // main interface to everything tensorflow
83
- ofxMSATensorFlow msa_tf;
84
-
85
- // Tensor to hold input image which is fed into the network
86
- tensorflow::Tensor image_tensor;
87
-
88
- // vector of Tensors to hold data coming back from the network
89
- // (it's a vector of Tensors, because that's how the API works)
90
- vector<tensorflow::Tensor> output_tensors;
30
+ // classifies pixels
31
+ // check the src of this class (ofxMSATFImageClassifier) to see how to do more generic stuff with ofxMSATensorFlow
32
+ msa::tf::ImageClassifier classifier;
91
33
92
34
// for webcam input
93
35
shared_ptr<ofVideoGrabber> video_grabber;
94
36
95
- // contains input image to classify
96
- ofImage input_image;
97
-
98
- // normalized float version of input image
99
- // keeping texture separate so it's not unnessecarily updated when it isn't needed
100
- ofFloatPixels processed_pix;
101
- ofTexture processed_tex;
102
-
103
- // contains all labels
104
- vector<string> labels;
105
-
106
37
// folder of images to classify
107
38
ofDirectory image_dir;
108
39
109
- // contains classification information from last classification attempt
110
- vector<int > top_label_indices;
111
- vector<float > top_scores;
112
-
113
- // ---------------------------------------------------------
114
- // Load pixels into the network, get the results
115
- void classify (ofPixels &pix) {
116
- // convert from unsigned char pix to float pix
117
- processed_pix = pix;
118
-
119
- // need to resize image to specific dimensions the model is expecting
120
- processed_pix.resize (kInputWidth , kInputHeight );
121
-
122
- // pixelwise normalize image by subtracting the mean and dividing by variance (across entire dataset)
123
- // I could do this without iterating over the pixels, by setting up a TensorFlow Graph, but I can't be bothered, this is less code
124
- float * pix_data = processed_pix.getData ();
125
- if (!pix_data) {
126
- ofLogError () << " Could not classify. pixel data is NULL" ;
127
- return ;
128
- }
129
- for (int i=0 ; i<kInputSize *3 ; i++) pix_data[i] = (pix_data[i] - kInputMean ) / kInputStd ;
130
-
131
- // make sure opengl texture is updated with new pixel info (needed for correct rendering)
132
- processed_tex.loadData (processed_pix);
133
-
134
- // copy data from image into tensorflow's Tensor class
135
- ofxMSATensorFlow::pixelsToTensor (processed_pix, image_tensor);
136
-
137
- // feed the data into the network, and request output
138
- // output_tensors don't need to be initialized or allocated. they will be filled once the network runs
139
- if ( !msa_tf.run ({ {kInputLayer , image_tensor } }, { kOutputLayer }, {}, &output_tensors) ) {
140
- ofLogError () << " Error during running. Check console for details." << endl;
141
- return ;
142
- }
143
-
144
- // the output from the network above is an array of probabilities for every single label
145
- // i.e. thousands of probabilities, we only want to the top few
146
- ofxMSATensorFlow::getTopScores (output_tensors[0 ], 6 , top_label_indices, top_scores);
147
- }
148
-
40
+ // top scoring classes
41
+ vector<int > top_label_indices; // contains top n label indices for input image
42
+ vector<float > top_class_probs; // contains top n probabilities for current input image
149
43
150
44
151
45
// --------------------------------------------------------------
152
46
void loadNextImage () {
153
47
static int file_index = 0 ;
48
+ ofImage img;
154
49
155
50
// System load dialog doesn't work with tensorflow :(
156
51
// auto o = ofSystemLoadDialog("Select image");
@@ -160,35 +55,48 @@ class ofApp : public ofBaseApp{
160
55
// img.load("images/fanboy.jpg");
161
56
162
57
// resorting to awful raw data file load hack!
163
- loadImageRaw (image_dir.getPath (file_index), input_image);
164
- classify (input_image.getPixels ());
58
+ loadImageRaw (image_dir.getPath (file_index), img, 299 , 299 );
59
+
60
+ classify (img.getPixels ());
165
61
file_index = (file_index+1 ) % image_dir.getFiles ().size ();
166
62
}
167
63
168
64
65
+ // --------------------------------------------------------------
66
+ void classify (const ofPixels& pix) {
67
+ // classify pixels
68
+ classifier.classify (pix);
69
+
70
+ msa::tf::getTopScores (classifier.getOutputTensors ()[0 ], 6 , top_label_indices, top_class_probs);
71
+ }
72
+
169
73
// --------------------------------------------------------------
170
74
void setup (){
171
75
ofLogNotice () << " Initializing... " ;
172
76
ofBackground (0 );
173
77
ofSetVerticalSync (true );
174
- ofSetFrameRate (60 );
78
+ // ofSetFrameRate(60);
79
+
80
+ // initialize the image classifier, lots of params to setup
81
+ // these settings are specific to the model
82
+ msa::tf::ImageClassifier::Settings settings;
83
+ settings.image_dims = { 299 , 299 , 3 };
84
+ settings.itensor_dims = { 1 , 299 , 299 , 3 };
85
+ settings.model_path = " models/tensorflow_inception_graph.pb" ;
86
+ settings.labels_path = " models/imagenet_comp_graph_label_strings.txt" ;
87
+ settings.input_layer_name = " Mul" ;
88
+ settings.output_layer_name = " softmax" ;
89
+ settings.dropout_layer_name = " " ;
90
+ settings.varconst_layer_suffix = " _VARHACK" ;
91
+ settings.norm_mean = 128 .0f /255 .0f ;
92
+ settings.norm_stddev = 128 .0f /255 .0f ;
93
+
94
+ // initialize classifier with these settings
95
+ classifier.setup (settings);
175
96
176
97
// get a list of all images in the 'images' folder
177
98
image_dir.listDir (" images" );
178
99
179
- // Initialize tensorflow session, return if error
180
- if ( !msa_tf.setup () ) return ;
181
-
182
- // Load graph (i.e. trained model) add to session, return if error
183
- if ( !msa_tf.loadGraph (kModelPath ) ) return ;
184
-
185
- // load text file containing labels (i.e. associating classification index with human readable text)
186
- if ( !ReadLabelsFile (ofToDataPath (kLabelsPath ), &labels) ) return ;
187
-
188
- // initialize input tensor dimensions
189
- // (not sure what the best way to do this was as there isn't an 'init' method, just a constructor)
190
- image_tensor = tensorflow::Tensor (tensorflow::DT_FLOAT, tensorflow::TensorShape ({ 1 , kInputHeight , kInputWidth , 3 }));
191
-
192
100
// load first image to classify
193
101
loadNextImage ();
194
102
@@ -197,66 +105,69 @@ class ofApp : public ofBaseApp{
197
105
198
106
199
107
// --------------------------------------------------------------
200
- void update (){
201
-
108
+ void update () {
202
109
// if video_grabber active,
203
110
if (video_grabber) {
204
111
// grab frame
205
112
video_grabber->update ();
206
113
207
114
if (video_grabber->isFrameNew ()) {
208
-
209
- // update input_image so it's drawn in the right place
210
- input_image.setFromPixels (video_grabber->getPixels ());
211
-
212
115
// send to classification if keypressed
213
- if (ofGetKeyPressed (' ' )) classify (input_image.getPixels ());
116
+ if (ofGetKeyPressed (' ' ))
117
+ classify (video_grabber->getPixels ());
214
118
}
215
119
}
216
120
}
217
121
122
+
218
123
// --------------------------------------------------------------
219
- void draw (){
220
- // draw input image if it's available
221
- float x = 0 ;
222
- if (input_image.isAllocated ()) {
223
- input_image.draw (x, 0 );
224
- x += input_image.getWidth ();
225
- }
124
+ void draw () {
125
+ if (classifier.isReady ()) {
126
+ ofSetColor (255 );
226
127
227
- // draw processed image if it's available
228
- if (processed_tex.isAllocated ()) {
229
- processed_tex.draw (x, 0 );
230
- x += processed_tex.getWidth ();
231
- }
128
+ // if video grabber active, draw in bottom left corner
129
+ if (video_grabber) video_grabber->draw (0 , ofGetHeight () - 240 , 320 , 240 );
232
130
233
- x += 20 ;
234
- float w = ofGetWidth () - 400 - x;
235
- float y = 40 ;
236
- float bar_height = 35 ;
237
131
238
- // iterate top scores and draw them
239
- for (int i=0 ; i<top_scores.size (); i++) {
240
- int label_index = top_label_indices[i];
241
- string label = labels[label_index];
242
- float p = top_scores[i]; // the score (i.e. probability, 0...1)
132
+ float x = 0 ;
243
133
244
- // draw full bar
245
- ofSetColor (ofLerp (50.0 , 255.0 , p), ofLerp (100.0 , 0.0 , p), ofLerp (150.0 , 0.0 , p));
246
- ofDrawRectangle (x, y, w * p, bar_height);
247
- ofSetColor (40 );
134
+ // draw input image
135
+ classifier.getInputImage ().draw (x, 0 );
136
+ x += classifier.getInputImage ().getWidth ();
248
137
249
- // draw outline
250
- ofNoFill ();
251
- ofDrawRectangle (x, y, w, bar_height);
252
- ofFill ();
138
+ // draw processed image
139
+ classifier.getProcessedImage ().draw (x, 0 );
140
+ x += classifier.getProcessedImage ().getWidth ();
253
141
254
- // draw text
255
- ofSetColor (255 );
256
- ofDrawBitmapString (label + " (" + ofToString (label_index) + " ): " + ofToString (p,4 ), x + w + 10 , y + 20 );
257
- y += bar_height + 5 ;
258
- }
142
+ x += 20 ;
143
+
144
+ float w = ofGetWidth () - 400 - x;
145
+ float y = 40 ;
146
+ float bar_height = 35 ;
147
+
148
+
149
+ // iterate top scores and draw them
150
+ for (int i=0 ; i<top_class_probs.size (); i++) {
151
+ int label_index = top_label_indices[i];
152
+ string label = classifier.getLabels ()[label_index];
153
+ float p = top_class_probs[i]; // the score (i.e. probability, 0...1)
259
154
155
+ // draw full bar
156
+ ofSetColor (ofLerp (50.0 , 255.0 , p), ofLerp (100.0 , 0.0 , p), ofLerp (150.0 , 0.0 , p));
157
+ ofDrawRectangle (x, y, w * p, bar_height);
158
+ ofSetColor (40 );
159
+
160
+ // draw outline
161
+ ofNoFill ();
162
+ ofDrawRectangle (x, y, w, bar_height);
163
+ ofFill ();
164
+
165
+ // draw text
166
+ ofSetColor (255 );
167
+ ofDrawBitmapString (label + " (" + ofToString (label_index) + " ): " + ofToString (p,4 ), x + w + 10 , y + 20 );
168
+ y += bar_height + 5 ;
169
+ }
170
+ }
260
171
261
172
ofSetColor (255 );
262
173
ofDrawBitmapString (ofToString (ofGetFrameRate ()), ofGetWidth () - 100 , 30 );
@@ -265,7 +176,7 @@ class ofApp : public ofBaseApp{
265
176
str_inst << " 'l' to load image\n " ;
266
177
str_inst << " or drag an image (must be raw, 299x299) onto the window\n " ;
267
178
str_inst << " 'v' to toggle video input" ;
268
- ofDrawBitmapString (str_inst.str (), 15 , input_image .getHeight () + 30 );
179
+ ofDrawBitmapString (str_inst.str (), 15 , classifier .getHeight () + 30 );
269
180
}
270
181
271
182
@@ -291,14 +202,18 @@ class ofApp : public ofBaseApp{
291
202
void dragEvent (ofDragInfo dragInfo){
292
203
if (dragInfo.files .empty ()) return ;
293
204
205
+ ofImage img;
206
+
294
207
string filePath = dragInfo.files [0 ];
295
208
// img.load(filePath); // FreeImage doesn't work :(
296
- loadImageRaw (filePath, input_image );
297
- classify (input_image .getPixels ());
209
+ loadImageRaw (filePath, img, 299 , 299 );
210
+ classify (img .getPixels ());
298
211
}
299
212
300
213
};
301
214
215
+
216
+
302
217
// ========================================================================
303
218
int main ( ){
304
219
ofSetupOpenGL (1200 , 800 , OF_WINDOW); // <-------- setup the GL context
0 commit comments