Skip to content

Commit 54bb334

Browse files
committed
big optimization on inception image classification (not using ofImage, but ofPixels + ofTexture, so unnessecary texture ops are avoided)
1 parent 175d7e6 commit 54bb334

File tree

5 files changed

+312
-328
lines changed

5 files changed

+312
-328
lines changed

example-inception3/example-inception3.qbs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ Project{
1212
name: { return FileInfo.baseName(path) }
1313

1414
files: [
15-
"src/main.cpp",
16-
"src/ofApp.cpp",
17-
"src/ofApp.h",
15+
"src/*",
1816
]
1917

2018
of.addons: [
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
/*
2+
* Image recognition using Google's Inception network
3+
* based on https://www.tensorflow.org/versions/master/tutorials/image_recognition/index.html
4+
*
5+
*
6+
* Uses pre-trained model https://storage.googleapis.com/download.tensorflow.org/models/inception_dec_2015.zip
7+
*
8+
* openFrameworks code loads and processes pre-trained model (i.e. makes calculations/predictions)
9+
*
10+
*/
11+
12+
13+
14+
#include "ofMain.h"
15+
#include "ofxMSATensorFlow.h"
16+
17+
18+
// input image dimensions dictated by trained model
19+
#define kInputWidth 299
20+
#define kInputHeight 299
21+
#define kInputSize (kInputWidth * kInputHeight)
22+
23+
24+
// we need to normalize the images before feeding into the network
25+
// from each pixel we subtract the mean and divide by variance
26+
// this is also dictated by the trained model
27+
#define kInputMean (128.0f/255.0f)
28+
#define kInputStd (128.0f/255.0f)
29+
30+
// model & labels files to load
31+
#define kModelPath "models/tensorflow_inception_graph.pb"
32+
#define kLabelsPath "models/imagenet_comp_graph_label_strings.txt"
33+
34+
35+
// every node in the network has a name
36+
// when passing in data to the network, or reading data back, we need to refer to the node by name
37+
// i.e. 'pass this data to node A', or 'read data back from node X'
38+
// these node names are specific to the architecture of the model
39+
#define kInputLayer "Mul"
40+
#define kOutputLayer "softmax"
41+
42+
43+
44+
//--------------------------------------------------------------
45+
// ofImage::load() (ie. Freeimage load) doesn't work with TensorFlow! (See README.md)
46+
// so I have to resort to this awful trick of loading raw image data 299x299 RGB
47+
void loadImageRaw(string path, ofImage &img) {
48+
ofFile file(path);
49+
img.setFromPixels((unsigned char*)file.readToBuffer().getData(), kInputWidth, kInputHeight, OF_IMAGE_COLOR);
50+
}
51+
52+
53+
54+
//--------------------------------------------------------------
55+
// Takes a file name, and loads a list of labels from it, one per line, and
56+
// returns a vector of the strings. It pads with empty strings so the length
57+
// of the result is a multiple of 16, because our model expects that.
58+
bool ReadLabelsFile(string file_name, std::vector<string>* result) {
59+
std::ifstream file(file_name);
60+
if (!file) {
61+
ofLogError() <<"ReadLabelsFile: " << file_name << " not found.";
62+
return false;
63+
}
64+
65+
result->clear();
66+
string line;
67+
while (std::getline(file, line)) {
68+
result->push_back(line);
69+
}
70+
const int padding = 16;
71+
while (result->size() % padding) {
72+
result->emplace_back();
73+
}
74+
return true;
75+
}
76+
77+
78+
79+
class ofApp : public ofBaseApp{
80+
public:
81+
82+
// main interface to everything tensorflow
83+
ofxMSATensorFlow msa_tf;
84+
85+
// Tensor to hold input image which is fed into the network
86+
tensorflow::Tensor image_tensor;
87+
88+
// vector of Tensors to hold data coming back from the network
89+
// (it's a vector of Tensors, because that's how the API works)
90+
vector<tensorflow::Tensor> output_tensors;
91+
92+
// for webcam input
93+
shared_ptr<ofVideoGrabber> video_grabber;
94+
95+
// contains input image to classify
96+
ofImage input_image;
97+
98+
// normalized float version of input image
99+
// keeping texture separate so it's not unnessecarily updated when it isn't needed
100+
ofFloatPixels processed_pix;
101+
ofTexture processed_tex;
102+
103+
// contains all labels
104+
vector<string> labels;
105+
106+
// folder of images to classify
107+
ofDirectory image_dir;
108+
109+
// contains classification information from last classification attempt
110+
vector<int> top_label_indices;
111+
vector<float> top_scores;
112+
113+
//---------------------------------------------------------
114+
// Load pixels into the network, get the results
115+
void classify(ofPixels &pix) {
116+
// convert from unsigned char pix to float pix
117+
processed_pix = pix;
118+
119+
// need to resize image to specific dimensions the model is expecting
120+
processed_pix.resize(kInputWidth, kInputHeight);
121+
122+
// pixelwise normalize image by subtracting the mean and dividing by variance (across entire dataset)
123+
// I could do this without iterating over the pixels, by setting up a TensorFlow Graph, but I can't be bothered, this is less code
124+
float* pix_data = processed_pix.getData();
125+
if(!pix_data) {
126+
ofLogError() << "Could not classify. pixel data is NULL";
127+
return;
128+
}
129+
for(int i=0; i<kInputSize*3; i++) pix_data[i] = (pix_data[i] - kInputMean) / kInputStd;
130+
131+
// make sure opengl texture is updated with new pixel info (needed for correct rendering)
132+
processed_tex.loadData(processed_pix);
133+
134+
// copy data from image into tensorflow's Tensor class
135+
ofxMSATensorFlow::pixelsToTensor(processed_pix, image_tensor);
136+
137+
// feed the data into the network, and request output
138+
// output_tensors don't need to be initialized or allocated. they will be filled once the network runs
139+
if( !msa_tf.run({ {kInputLayer, image_tensor } }, { kOutputLayer }, {}, &output_tensors) ) {
140+
ofLogError() << "Error during running. Check console for details." << endl;
141+
return;
142+
}
143+
144+
// the output from the network above is an array of probabilities for every single label
145+
// i.e. thousands of probabilities, we only want to the top few
146+
ofxMSATensorFlow::getTopScores(output_tensors[0], 6, top_label_indices, top_scores);
147+
}
148+
149+
150+
151+
//--------------------------------------------------------------
152+
void loadNextImage() {
153+
static int file_index = 0;
154+
155+
// System load dialog doesn't work with tensorflow :(
156+
//auto o = ofSystemLoadDialog("Select image");
157+
//if(!o.bSuccess) return;
158+
159+
// FreeImage doesn't work with tensorflow! :(
160+
//img.load("images/fanboy.jpg");
161+
162+
// resorting to awful raw data file load hack!
163+
loadImageRaw(image_dir.getPath(file_index), input_image);
164+
classify(input_image.getPixels());
165+
file_index = (file_index+1) % image_dir.getFiles().size();
166+
}
167+
168+
169+
//--------------------------------------------------------------
170+
void setup(){
171+
ofLogNotice() << "Initializing... ";
172+
ofBackground(0);
173+
ofSetVerticalSync(true);
174+
ofSetFrameRate(60);
175+
176+
// get a list of all images in the 'images' folder
177+
image_dir.listDir("images");
178+
179+
// Initialize tensorflow session, return if error
180+
if( !msa_tf.setup() ) return;
181+
182+
// Load graph (i.e. trained model) add to session, return if error
183+
if( !msa_tf.loadGraph(kModelPath) ) return;
184+
185+
// load text file containing labels (i.e. associating classification index with human readable text)
186+
if( !ReadLabelsFile(ofToDataPath(kLabelsPath), &labels) ) return;
187+
188+
// initialize input tensor dimensions
189+
// (not sure what the best way to do this was as there isn't an 'init' method, just a constructor)
190+
image_tensor = tensorflow::Tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ 1, kInputHeight, kInputWidth, 3 }));
191+
192+
// load first image to classify
193+
loadNextImage();
194+
195+
ofLogNotice() << "Init successfull";
196+
}
197+
198+
199+
//--------------------------------------------------------------
200+
void update(){
201+
202+
// if video_grabber active,
203+
if(video_grabber) {
204+
// grab frame
205+
video_grabber->update();
206+
207+
if(video_grabber->isFrameNew()) {
208+
209+
// update input_image so it's drawn in the right place
210+
input_image.setFromPixels(video_grabber->getPixels());
211+
212+
// send to classification if keypressed
213+
if(ofGetKeyPressed(' ')) classify(input_image.getPixels());
214+
}
215+
}
216+
}
217+
218+
//--------------------------------------------------------------
219+
void draw(){
220+
// draw input image if it's available
221+
float x = 0;
222+
if(input_image.isAllocated()) {
223+
input_image.draw(x, 0);
224+
x += input_image.getWidth();
225+
}
226+
227+
// draw processed image if it's available
228+
if(processed_tex.isAllocated()) {
229+
processed_tex.draw(x, 0);
230+
x += processed_tex.getWidth();
231+
}
232+
233+
x += 20;
234+
float w = ofGetWidth() - 400 - x;
235+
float y = 40;
236+
float bar_height = 35;
237+
238+
// iterate top scores and draw them
239+
for(int i=0; i<top_scores.size(); i++) {
240+
int label_index = top_label_indices[i];
241+
string label = labels[label_index];
242+
float p = top_scores[i]; // the score (i.e. probability, 0...1)
243+
244+
// draw full bar
245+
ofSetColor(ofLerp(50.0, 255.0, p), ofLerp(100.0, 0.0, p), ofLerp(150.0, 0.0, p));
246+
ofDrawRectangle(x, y, w * p, bar_height);
247+
ofSetColor(40);
248+
249+
// draw outline
250+
ofNoFill();
251+
ofDrawRectangle(x, y, w, bar_height);
252+
ofFill();
253+
254+
// draw text
255+
ofSetColor(255);
256+
ofDrawBitmapString(label + " (" + ofToString(label_index) + "): " + ofToString(p,4), x + w + 10, y + 20);
257+
y += bar_height + 5;
258+
}
259+
260+
261+
ofSetColor(255);
262+
ofDrawBitmapString(ofToString(ofGetFrameRate()), ofGetWidth() - 100, 30);
263+
264+
stringstream str_inst;
265+
str_inst << "'l' to load image\n";
266+
str_inst << "or drag an image (must be raw, 299x299) onto the window\n";
267+
str_inst << "'v' to toggle video input";
268+
ofDrawBitmapString(str_inst.str(), 15, input_image.getHeight() + 30);
269+
}
270+
271+
272+
//--------------------------------------------------------------
273+
void keyPressed(int key){
274+
switch(key) {
275+
276+
case 'v':
277+
if(video_grabber) video_grabber = NULL;
278+
else {
279+
video_grabber = make_shared<ofVideoGrabber>();
280+
video_grabber->setup(320, 240);
281+
}
282+
break;
283+
284+
case 'l':
285+
loadNextImage();
286+
break;
287+
}
288+
}
289+
290+
//--------------------------------------------------------------
291+
void dragEvent(ofDragInfo dragInfo){
292+
if(dragInfo.files.empty()) return;
293+
294+
string filePath = dragInfo.files[0];
295+
//img.load(filePath); // FreeImage doesn't work :(
296+
loadImageRaw(filePath, input_image);
297+
classify(input_image.getPixels());
298+
}
299+
300+
};
301+
302+
//========================================================================
303+
int main( ){
304+
ofSetupOpenGL(1200, 800, OF_WINDOW); // <-------- setup the GL context
305+
306+
// this kicks off the running of my app
307+
// can be OF_WINDOW or OF_FULLSCREEN
308+
// pass in width and height too:
309+
ofRunApp(new ofApp());
310+
311+
}

example-inception3/src/main.cpp

Lines changed: 0 additions & 13 deletions
This file was deleted.

0 commit comments

Comments
 (0)