himrep
GIEFeatExtractor.cpp
1 
2 #include <opencv2/imgproc/types_c.h>
3 #include <opencv2/imgproc.hpp>
4 #include "GIEFeatExtractor.h"
5 
6 #include <sstream>
7 
8 // Allocate ZeroCopy mapped memory, shared between CUDA and CPU.
9 bool GIEFeatExtractor::cudaAllocMapped( void** cpuPtr, void** gpuPtr, size_t size )
10 {
11  if( !cpuPtr || !gpuPtr || size == 0 )
12  return false;
13 
14  //CUDA(cudaSetDeviceFlags(cudaDeviceMapHost));
15 
16  if( CUDA_FAILED(cudaHostAlloc(cpuPtr, size, cudaHostAllocMapped)) )
17  return false;
18 
19  if( CUDA_FAILED(cudaHostGetDevicePointer(gpuPtr, *cpuPtr, 0)) )
20  return false;
21 
22  memset(*cpuPtr, 0, size);
23  std::cout << "cudaAllocMapped : " << size << " bytes" << std::endl;
24  return true;
25 }
26 
27 bool GIEFeatExtractor::cudaFreeMapped(void *cpuPtr)
28 {
29  if ( CUDA_FAILED( cudaFreeHost(cpuPtr) ) )
30  return false;
31  std::cout << "cudaFreeMapped: OK" << std::endl;
32 }
33 
34 bool GIEFeatExtractor::caffeToGIEModel( const std::string& deployFile, // name for .prototxt
35  const std::string& modelFile, // name for .caffemodel
36  const std::string& binaryprotoFile, // name for .binaryproto
37  const std::vector<std::string>& outputs, // network outputs
38  unsigned int maxBatchSize, // batch size - NB must be at least as large as the batch we want to run with)
39  std::ostream& gieModelStream) // output stream for the GIE model
40 {
41  // create API root class - must span the lifetime of the engine usage
42  nvinfer1::IBuilder* builder = createInferBuilder(gLogger);
43  nvinfer1::INetworkDefinition* network = builder->createNetwork();
44 
45  builder->setMinFindIterations(3); // allow time for TX1 GPU to spin up
46  builder->setAverageFindIterations(2);
47 
48  // parse the caffe model to populate the network, then set the outputs
49  nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
50 
51  const bool useFp16 = builder->platformHasFastFp16(); //getHalf2Mode();
52  std::cout << "Platform FP16 support: " << useFp16 << std::endl;
53  std::cout << "Loading: " << deployFile << ", " << modelFile << std::endl;
54 
55  nvinfer1::DataType modelDataType = useFp16 ? nvinfer1::DataType::kHALF : nvinfer1::DataType::kFLOAT; // create a 16-bit model if it's natively supported
56  const nvcaffeparser1::IBlobNameToTensor *blobNameToTensor = parser->parse(deployFile.c_str(), // caffe deploy file
57  modelFile.c_str(), // caffe model file
58  *network, // network definition that the parser will populate
59  modelDataType);
60 
61  if( !blobNameToTensor )
62  {
63  std::cout << "Failed to parse caffe network." << std::endl;
64  return false;
65  }
66 
67  if (binaryprotoFile!="")
68  {
69  // Parse the mean image if it is needed
70 
71  nvcaffeparser1::IBinaryProtoBlob* meanBlob = parser->parseBinaryProto(binaryprotoFile.c_str());
72  resizeDims = meanBlob->getDimensions();
73 
74  const float *meanDataConst = reinterpret_cast<const float*>(meanBlob->getData()); // expected to be float* (c,h,w)
75 
76  meanData = (float *) malloc(resizeDims.w*resizeDims.h*resizeDims.c*resizeDims.n*sizeof(float));
77  memcpy(meanData, meanDataConst, resizeDims.w*resizeDims.h*resizeDims.c*resizeDims.n*sizeof(float) );
78 
79  //cv::Mat tmpMat(resizeDims.h, resizeDims.w, CV_8UC3, meanDataChangeable);
80 
81  //cv::cvtColor(tmpMat, tmpMat, CV_RGB2BGR);
82  //std::cout << "converted" << std::endl;
83 
84  //tmpMat.copyTo(meanMat);
85 
86  meanBlob->destroy();
87  //free(meanDataChangeable);
88 
89  }
90 
91  // the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate
92  const size_t num_outputs = outputs.size();
93 
94  for( size_t n=0; n < num_outputs; n++ )
95  network->markOutput(*blobNameToTensor->find(outputs[n].c_str()));
96 
97  // Build the engine
98  std::cout << "Configuring CUDA engine..." << std::endl;
99 
100  builder->setMaxBatchSize(maxBatchSize);
101  builder->setMaxWorkspaceSize(16 << 20);
102 
103  // set up the network for paired-fp16 format, only on DriveCX
104  if (useFp16)
105  builder->setHalf2Mode(true);
106 
107  std::cout << "Building CUDA engine..." << std::endl;
108  nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
109 
110  if( !engine )
111  {
112  std::cout << "Failed to build CUDA engine." << std::endl;
113  return false;
114  }
115 
116  network->destroy();
117  parser->destroy();
118 
119  // serialize the engine, then close everything down
120  engine->serialize(gieModelStream);
121 
122  engine->destroy();
123  builder->destroy();
124 
125  return true;
126 
127 }
128 
129 GIEFeatExtractor::GIEFeatExtractor(string _caffemodel_file,
130  string _binaryproto_meanfile, float _meanR, float _meanG, float _meanB,
131  string _prototxt_file, int _resizeWidth, int _resizeHeight,
132  string _blob_name,
133  bool _timing ) {
134 
135  mEngine = NULL;
136  mInfer = NULL;
137  mContext = NULL;
138 
139  resizeDims.n = -1;
140  resizeDims.c = -1;
141  resizeDims.w = -1;
142  resizeDims.h = -1;
143 
144  mWidth = 0;
145  mHeight = 0;
146  mInputSize = 0;
147 
148  mInputCPU = NULL;
149  mInputCUDA = NULL;
150 
151  mOutputSize = 0;
152  mOutputDims = 0;
153 
154  mOutputCPU = NULL;
155  mOutputCUDA = NULL;
156 
157  prototxt_file = "";
158  caffemodel_file = "";
159  blob_name = "";
160  binaryproto_meanfile = "";
161 
162  timing = false;
163 
164 
165  if( !init(_caffemodel_file, _binaryproto_meanfile, _meanR, _meanG, _meanB, _prototxt_file, _resizeWidth, _resizeHeight, _blob_name ) )
166  {
167  std::cout << "GIEFeatExtractor: init() failed." << std::endl;
168  }
169 
170  // Initialize timing flag
171  timing = _timing;
172 
173 }
174 
175 bool GIEFeatExtractor::init(string _caffemodel_file, string _binaryproto_meanfile, float _meanR, float _meanG, float _meanB, string _prototxt_file, int _resizeWidth, int _resizeHeight, string _blob_name)
176 {
177 
178  cudaDeviceProp prop;
179  int whichDevice;
180 
181  if ( CUDA_FAILED( cudaGetDevice(&whichDevice)) )
182  return false;
183 
184  if ( CUDA_FAILED( cudaGetDeviceProperties(&prop, whichDevice)) )
185  return false;
186 
187  if (prop.canMapHostMemory != 1)
188  {
189  std::cout << "Device cannot map memory!" << std::endl;
190  return false;
191  }
192 
193  //if ( CUDA_FAILED( cudaSetDeviceFlags(cudaDeviceMapHost)) )
194  // return false;
195 
196  // Assign specified .caffemodel, .binaryproto, .prototxt files
197  caffemodel_file = _caffemodel_file;
198  binaryproto_meanfile = _binaryproto_meanfile;
199 
200  mean_values.push_back(_meanB);
201  mean_values.push_back(_meanG);
202  mean_values.push_back(_meanR);
203 
204  prototxt_file = _prototxt_file;
205 
206  //Assign blob to be extracted
207  blob_name = _blob_name;
208 
209  // Load and convert model
210  std::stringstream gieModelStream;
211  gieModelStream.seekg(0, gieModelStream.beg);
212 
213  if( !caffeToGIEModel( prototxt_file, caffemodel_file, binaryproto_meanfile, std::vector< std::string > { blob_name }, 1, gieModelStream) )
214  {
215  std::cout << "Failed to load: " << caffemodel_file << std::endl;
216  }
217 
218  std::cout << caffemodel_file << ": loaded." << std::endl;
219 
220  // Create runtime inference engine execution context
221  nvinfer1::IRuntime* infer = createInferRuntime(gLogger);
222  if( !infer )
223  {
224  std::cout << "Failed to create InferRuntime." << std::endl;
225  }
226 
227  nvinfer1::ICudaEngine* engine = infer->deserializeCudaEngine(gieModelStream);
228  if( !engine )
229  {
230  std::cout << "Failed to create CUDA engine." << std::endl;
231  }
232 
233  nvinfer1::IExecutionContext* context = engine->createExecutionContext();
234  if( !context )
235  {
236  std::cout << "failed to create execution context." << std::endl;
237  }
238 
239  std::cout << "CUDA engine context initialized with " << engine->getNbBindings() << " bindings." << std::endl;
240 
241  mInfer = infer;
242  mEngine = engine;
243  mContext = context;
244 
245  // Determine dimensions of network bindings
246  const int inputIndex = engine->getBindingIndex("data");
247  const int outputIndex = engine->getBindingIndex( blob_name.c_str() );
248 
249  std::cout << caffemodel_file << " input binding index: " << inputIndex << std::endl;
250  std::cout << caffemodel_file << " output binding index: " << outputIndex << std::endl;
251 
252  nvinfer1::Dims3 inputDims = engine->getBindingDimensions(inputIndex);
253  nvinfer1::Dims3 outputDims = engine->getBindingDimensions(outputIndex);
254 
255  size_t inputSize = inputDims.c * inputDims.h * inputDims.w * sizeof(float);
256  size_t outputSize = outputDims.c * outputDims.h * outputDims.w * sizeof(float);
257 
258  std::cout << caffemodel_file << "input dims (c=" << inputDims.c << " h=" << inputDims.h << " w=" << inputDims.w << ") size=" << inputSize << std::endl;
259  std::cout << caffemodel_file << "output dims (c=" << outputDims.c << " h=" << outputDims.h << " w=" << outputDims.w << ") size=" << outputSize << std::endl;
260 
261  // Allocate memory to hold the input image
262  if ( !cudaAllocMapped((void**)&mInputCPU, (void**)&mInputCUDA, inputSize) )
263  {
264  std::cout << "Failed to alloc CUDA mapped memory for input, " << inputSize << " bytes" << std::endl;
265  }
266 
267  mInputSize = inputSize;
268  mWidth = inputDims.w;
269  mHeight = inputDims.h;
270 
271  // Allocate output memory to hold the result
272  if( !cudaAllocMapped((void**)&mOutputCPU, (void**)&mOutputCUDA, outputSize) )
273  {
274  std::cout << "Failed to alloc CUDA mapped memory for output, " << outputSize << " bytes" << std::endl;
275  }
276 
277  mOutputSize = outputSize;
278  mOutputDims = outputDims.c;
279 
280  std::cout << caffemodel_file << ": initialized." << std::endl;
281 
282  if (binaryproto_meanfile=="")
283  {
284  // Set input size if the mean pixel is used
285  resizeDims.h = _resizeHeight;
286  resizeDims.w = _resizeWidth;
287  resizeDims.c = 3;
288  resizeDims.n = 1;
289  }
290 
291  return true;
292 }
293 
294 GIEFeatExtractor::~GIEFeatExtractor()
295 {
296  if( mEngine != NULL )
297  {
298  mEngine->destroy();
299  mEngine = NULL;
300  }
301 
302  if( mInfer != NULL )
303  {
304  mInfer->destroy();
305  mInfer = NULL;
306  }
307 
308  cudaFreeMapped(mOutputCPU);
309  cudaFreeMapped(mInputCPU);
310 
311  if (mean_values[0]==-1)
312  free(meanData);
313 }
314 
315 bool GIEFeatExtractor::extract_singleFeat_1D(cv::Mat &imMat, vector<float> &features, float (&times)[2])
316 {
317  times[0] = 0.0f;
318  times[1] = 0.0f;
319 
320  // Check input image
321  if (imMat.empty())
322  {
323  std::cout << "GIEFeatExtractor::extract_singleFeat_1D(): empty imMat!" << std::endl;
324  return false;
325  }
326 
327  // Start timing
328  cudaEvent_t startPrep, stopPrep, startNet, stopNet;
329  if (timing)
330  {
331  cudaEventCreate(&startPrep);
332  cudaEventCreate(&startNet);
333  cudaEventCreate(&stopPrep);
334  cudaEventCreate(&stopNet);
335  cudaEventRecord(startPrep, NULL);
336  cudaEventRecord(startNet, NULL);
337  }
338 
339  // Image preprocessing
340 
341  // resize (to 256x256 or to the size of the mean mean image)
342  if (imMat.rows != resizeDims.h || imMat.cols != resizeDims.w)
343  {
344  if (imMat.rows > resizeDims.h || imMat.cols > resizeDims.w)
345  {
346  cv::resize(imMat, imMat, cv::Size(resizeDims.h, resizeDims.w), 0, 0, cv::INTER_LANCZOS4);
347  }
348  else
349  {
350  cv::resize(imMat, imMat, cv::Size(resizeDims.h, resizeDims.w), 0, 0, cv::INTER_LINEAR);
351  }
352  }
353 
354  // crop and subtract the mean image or the mean pixel
355  int h_off = (imMat.rows - mHeight) / 2;
356  int w_off = (imMat.cols - mWidth) / 2;
357 
358  cv::Mat cv_cropped_img = imMat;
359  cv::Rect roi(w_off, h_off, mWidth, mHeight);
360  cv_cropped_img = imMat(roi);
361 
362  int top_index;
363  for (int h = 0; h < mHeight; ++h)
364  {
365  const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
366  int img_index = 0;
367  for (int w = 0; w < mWidth; ++w)
368  {
369  for (int c = 0; c < imMat.channels(); ++c)
370  {
371  top_index = (c * mHeight + h) * mWidth + w;
372  float pixel = static_cast<float>(ptr[img_index++]);
373  if (mean_values[0]==-1)
374  {
375  int mean_index = (c * imMat.rows + h_off + h) * imMat.cols + w_off + w;
376  mInputCPU[top_index] = pixel - meanData[mean_index];
377  }
378  else
379  {
380  mInputCPU[top_index] = pixel - mean_values[c];
381  }
382  }
383  }
384  }
385 
386  /*
387 
388  // subtract mean
389  if (meanR==-1)
390  {
391  if (!meanMat.empty() && imMat.rows==meanMat.rows && imMat.cols==meanMat.cols && imMat.channels()==meanMat.channels() && imMat.type()==meanMat.type())
392  {
393  imMat = imMat - meanMat;
394  }
395  else
396  {
397  std::cout << "GIEFeatExtractor::extract_singleFeat_1D(): cannot subtract mean image!" << std::endl;
398  return false;
399  }
400  }
401  else
402  {
403  imMat = imMat - cv::Scalar(meanB, meanG, meanR);
404  }
405 
406  // crop to input dimension (central crop)
407  if (imMat.cols>=mWidth && imMat.rows>=mHeight)
408  {
409  cv::Rect imROI(floor((imMat.cols-mWidth)*0.5f), floor((imMat.rows-mHeight)*0.5f), mWidth, mHeight);
410  imMat(imROI).copyTo(imMat);
411  }
412  else
413  {
414  cv::resize(imMat, imMat, cv::Size(mHeight, mWidth), 0, 0, cv::INTER_LINEAR);
415  }
416 
417  // convert to float (with range 0-255)
418  imMat.convertTo(imMat, CV_32FC3);
419 
420  if ( !imMat.isContinuous() )
421  imMat = imMat.clone();*/
422 
423  // copy
424  //CUDA( cudaMemcpy(mInputCPU, imMat.data, mInputSize, cudaMemcpyDefault) );
425  //memcpy(mInputCPU, imMat.data, mInputSize);
426 
427  void* inferenceBuffers[] = { mInputCUDA, mOutputCUDA };
428 
429  if (timing)
430  {
431  // Record the stop event
432  cudaEventRecord(stopPrep, NULL);
433 
434  // Wait for the stop event to complete
435  cudaEventSynchronize(stopPrep);
436 
437  cudaEventElapsedTime(times, startPrep, stopPrep);
438  }
439 
440  mContext->execute(1, inferenceBuffers);
441  //CUDA(cudaDeviceSynchronize());
442 
443  features.insert(features.end(), &mOutputCPU[0], &mOutputCPU[mOutputDims]);
444 
445  if (timing)
446  {
447  // Record the stop event
448  cudaEventRecord(stopNet, NULL);
449 
450  // Wait for the stop event to complete
451  cudaEventSynchronize(stopNet);
452 
453  cudaEventElapsedTime(times+1, startNet, stopNet);
454  }
455 
456  return true;
457 }
458