stereo-vision
All Data Structures Namespaces Functions Modules Pages
elas_omp.cpp
1 /*
2 Copyright 2011. All rights reserved.
3 Institute of Measurement and Control Systems
4 Karlsruhe Institute of Technology, Germany
5 
6 This file is part of libelas.
7 Authors: Andreas Geiger
8 
9 libelas is free software; you can redistribute it and/or modify it under the
10 terms of the GNU General Public License as published by the Free Software
11 Foundation; either version 3 of the License, or any later version.
12 
13 libelas is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
15 PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License along with
18 libelas; if not, write to the Free Software Foundation, Inc., 51 Franklin
19 Street, Fifth Floor, Boston, MA 02110-1301, USA
20  */
21 
22 #include "elas.h"
23 
24 #include <algorithm>
25 #include <math.h>
26 #include <omp.h>
27 #include "descriptor.h"
28 #include "triangle.h"
29 #include "matrix.h"
30 
31 using namespace std;
32 
33 bool Elas::process (uint8_t* I1_,uint8_t* I2_,float* D1,float* D2,const int32_t* dims){
34 
35  // get width, height and bytes per line
36  width = dims[0];
37  height = dims[1];
38  bpl = width + 15-(width-1)%16;
39 
40  // copy images to byte aligned memory
41  I1 = (uint8_t*)_mm_malloc(bpl*height*sizeof(uint8_t),16);
42  I2 = (uint8_t*)_mm_malloc(bpl*height*sizeof(uint8_t),16);
43  memset (I1,0,bpl*height*sizeof(uint8_t));
44  memset (I2,0,bpl*height*sizeof(uint8_t));
45  if (bpl==dims[2]) {
46  memcpy(I1,I1_,bpl*height*sizeof(uint8_t));
47  memcpy(I2,I2_,bpl*height*sizeof(uint8_t));
48  } else {
49  for (int32_t v=0; v<height; v++) {
50  memcpy(I1+v*bpl,I1_+v*dims[2],width*sizeof(uint8_t));
51  memcpy(I2+v*bpl,I2_+v*dims[2],width*sizeof(uint8_t));
52  }
53  }
54 
55  // allocate memory for disparity grid
56  int32_t grid_width = (int32_t)ceil((float)width/(float)param.grid_size);
57  int32_t grid_height = (int32_t)ceil((float)height/(float)param.grid_size);
58  int32_t grid_dims[3] = {param.disp_max+2,grid_width,grid_height};
59  int32_t* disparity_grid_1 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,sizeof(int32_t));
60  int32_t* disparity_grid_2 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,sizeof(int32_t));
61 
62 #ifdef PROFILE
63  timer.start("Descriptor");
64 #endif
65  Descriptor desc1(I1,width,height,bpl,param.subsampling);
66  Descriptor desc2(I2,width,height,bpl,param.subsampling);
67 
68 #ifdef PROFILE
69  timer.start("Support Matches");
70 #endif
71  vector<support_pt> p_support = computeSupportMatches(desc1.I_desc,desc2.I_desc);
72 
73  bool success;
74 
75  if (p_support.size()>=3)
76  {
77 
78 #ifdef PROFILE
79  timer.start("Parallel Region #1 = {Delaunay Triangulation, Disparity Planes, Grid}");
80 #endif
81 
82  vector<triangle> tri_1, tri_2;
83 #pragma omp parallel num_threads(2)
84  {
85 #pragma omp sections
86  {
87 #pragma omp section
88  {
89  tri_1 = computeDelaunayTriangulation(p_support,0);
90  computeDisparityPlanes(p_support,tri_1,0);
91  createGrid(p_support,disparity_grid_1,grid_dims,0);
92  //computeDisparity(p_support,tri_1,disparity_grid_1,grid_dims,desc1.I_desc,desc2.I_desc,0,D1);
93  }
94 #pragma omp section
95  {
96  tri_2 = computeDelaunayTriangulation(p_support,1);
97  computeDisparityPlanes(p_support,tri_2,1);
98  createGrid(p_support,disparity_grid_2,grid_dims,1);
99  //computeDisparity(p_support,tri_2,disparity_grid_2,grid_dims,desc1.I_desc,desc2.I_desc,1,D2);
100 
101  }
102 
103  }
104  }
105 
106 #ifdef PROFILE
107  timer.start("Matching");
108 #endif
109 
110 #pragma omp sections
111  {
112 #pragma omp section
113  computeDisparity(p_support,tri_1,disparity_grid_1,grid_dims,desc1.I_desc,desc2.I_desc,0,D1);
114 #pragma omp section
115  computeDisparity(p_support,tri_2,disparity_grid_2,grid_dims,desc1.I_desc,desc2.I_desc,1,D2);
116  }
117 
118 #ifdef PROFILE
119  timer.start("L/R Consistency Check");
120 #endif
121  leftRightConsistencyCheck(D1,D2);
122 
123 #ifdef PROFILE
124  timer.start("Remove Small Segments");
125 #endif
126  removeSmallSegments(D1);
127  if (!param.postprocess_only_left)
128  removeSmallSegments(D2);
129 
130 #ifdef PROFILE
131  timer.start("Gap Interpolation");
132 #endif
133  gapInterpolation(D1);
134  if (!param.postprocess_only_left)
135  gapInterpolation(D2);
136 
137  if (param.filter_adaptive_mean) {
138 #ifdef PROFILE
139  timer.start("Adaptive Mean");
140 #endif
141  adaptiveMean(D1);
142  if (!param.postprocess_only_left)
143  adaptiveMean(D2);
144  }
145 
146  if (param.filter_median) {
147 #ifdef PROFILE
148  timer.start("Median");
149 #endif
150  median(D1);
151  if (!param.postprocess_only_left)
152  median(D2);
153  }
154 
155 #ifdef PROFILE
156  timer.plot();
157 #endif
158 
159  success = true;
160  } else
161  {
162  success = false;
163  }
164 
165  // release memory
166  free(disparity_grid_1);
167  free(disparity_grid_2);
168  _mm_free(I1);
169  _mm_free(I2);
170 
171  return success;
172 }
173 
174 void Elas::removeInconsistentSupportPoints (int16_t* D_can,int32_t D_can_width,int32_t D_can_height) {
175 
176  // for all valid support points do
177 #pragma omp for
178  for (int32_t u_can=0; u_can<D_can_width; u_can++) {
179  for (int32_t v_can=0; v_can<D_can_height; v_can++) {
180  int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
181  if (d_can>=0) {
182 
183  // compute number of other points supporting the current point
184  int32_t support = 0;
185  for (int32_t u_can_2=u_can-param.incon_window_size; u_can_2<=u_can+param.incon_window_size; u_can_2++) {
186  for (int32_t v_can_2=v_can-param.incon_window_size; v_can_2<=v_can+param.incon_window_size; v_can_2++) {
187  if (u_can_2>=0 && v_can_2>=0 && u_can_2<D_can_width && v_can_2<D_can_height) {
188  int16_t d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
189  if (d_can_2>=0 && abs(d_can-d_can_2)<=param.incon_threshold)
190  support++;
191  }
192  }
193  }
194 
195  // invalidate support point if number of supporting points is too low
196  if (support<param.incon_min_support)
197  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
198  }
199  }
200  }
201 }
202 
203 void Elas::removeRedundantSupportPoints(int16_t* D_can,int32_t D_can_width,int32_t D_can_height,
204  int32_t redun_max_dist, int32_t redun_threshold, bool vertical) {
205 
206  // parameters
207  int32_t redun_dir_u[2] = {0,0};
208  int32_t redun_dir_v[2] = {0,0};
209  if (vertical) {
210  redun_dir_v[0] = -1;
211  redun_dir_v[1] = +1;
212  } else {
213  redun_dir_u[0] = -1;
214  redun_dir_u[1] = +1;
215  }
216 
217  // for all valid support points do
218 #pragma omp for
219  for (int32_t u_can=0; u_can<D_can_width; u_can++) {
220  for (int32_t v_can=0; v_can<D_can_height; v_can++) {
221  int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
222  if (d_can>=0) {
223 
224  // check all directions for redundancy
225  bool redundant = true;
226  for (int32_t i=0; i<2; i++) {
227 
228  // search for support
229  int32_t u_can_2 = u_can;
230  int32_t v_can_2 = v_can;
231  int16_t d_can_2;
232  bool support = false;
233  for (int32_t j=0; j<redun_max_dist; j++) {
234  u_can_2 += redun_dir_u[i];
235  v_can_2 += redun_dir_v[i];
236  if (u_can_2<0 || v_can_2<0 || u_can_2>=D_can_width || v_can_2>=D_can_height)
237  break;
238  d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
239  if (d_can_2>=0 && abs(d_can-d_can_2)<=redun_threshold) {
240  support = true;
241  break;
242  }
243  }
244 
245  // if we have no support => point is not redundant
246  if (!support) {
247  redundant = false;
248  break;
249  }
250  }
251 
252  // invalidate support point if it is redundant
253  if (redundant)
254  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
255  }
256  }
257  }
258 }
259 
260 void Elas::addCornerSupportPoints(vector<support_pt> &p_support) {
261 
262  // list of border points
263  vector<support_pt> p_border;
264  p_border.push_back(support_pt(0,0,0));
265  p_border.push_back(support_pt(0,height-1,0));
266  p_border.push_back(support_pt(width-1,0,0));
267  p_border.push_back(support_pt(width-1,height-1,0));
268 
269  // find closest d
270  for (int32_t i=0; i<p_border.size(); i++) {
271  int32_t best_dist = 10000000;
272  for (int32_t j=0; j<p_support.size(); j++) {
273  int32_t du = p_border[i].u-p_support[j].u;
274  int32_t dv = p_border[i].v-p_support[j].v;
275  int32_t curr_dist = du*du+dv*dv;
276  if (curr_dist<best_dist) {
277  best_dist = curr_dist;
278  p_border[i].d = p_support[j].d;
279  }
280  }
281  }
282 
283  // for right image
284  p_border.push_back(support_pt(p_border[2].u+p_border[2].d,p_border[2].v,p_border[2].d));
285  p_border.push_back(support_pt(p_border[3].u+p_border[3].d,p_border[3].v,p_border[3].d));
286 
287  // add border points to support points
288  for (int32_t i=0; i<p_border.size(); i++)
289  p_support.push_back(p_border[i]);
290 }
291 
292 inline int16_t Elas::computeMatchingDisparity (const int32_t &u,const int32_t &v,uint8_t* I1_desc,uint8_t* I2_desc,const bool &right_image) {
293 
294  const int32_t u_step = 2;
295  const int32_t v_step = 2;
296  const int32_t window_size = 3;
297 
298  int32_t desc_offset_1 = -16*u_step-16*width*v_step;
299  int32_t desc_offset_2 = +16*u_step-16*width*v_step;
300  int32_t desc_offset_3 = -16*u_step+16*width*v_step;
301  int32_t desc_offset_4 = +16*u_step+16*width*v_step;
302 
303  __m128i xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
304 
305  // check if we are inside the image region
306  if (u>=window_size+u_step && u<=width-window_size-1-u_step && v>=window_size+v_step && v<=height-window_size-1-v_step) {
307 
308  // compute desc and start addresses
309  int32_t line_offset = 16*width*v;
310  uint8_t *I1_line_addr,*I2_line_addr;
311  if (!right_image) {
312  I1_line_addr = I1_desc+line_offset;
313  I2_line_addr = I2_desc+line_offset;
314  } else {
315  I1_line_addr = I2_desc+line_offset;
316  I2_line_addr = I1_desc+line_offset;
317  }
318 
319  // compute I1 block start addresses
320  uint8_t* I1_block_addr = I1_line_addr+16*u;
321  uint8_t* I2_block_addr;
322 
323  // we require at least some texture
324  int32_t sum = 0;
325  for (int32_t i=0; i<16; i++)
326  sum += abs((int32_t)(*(I1_block_addr+i))-128);
327  if (sum<param.support_texture)
328  return -1;
329 
330  // load first blocks to xmm registers
331  xmm1 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_1));
332  xmm2 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_2));
333  xmm3 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_3));
334  xmm4 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_4));
335 
336  // declare match energy for each disparity
337  int32_t u_warp;
338 
339  // best match
340  int16_t min_1_E = 32767;
341  int16_t min_1_d = -1;
342  int16_t min_2_E = 32767;
343  int16_t min_2_d = -1;
344 
345  // get valid disparity range
346  int32_t disp_min_valid = max(param.disp_min,0);
347  int32_t disp_max_valid = param.disp_max;
348  if (!right_image) disp_max_valid = min(param.disp_max,u-window_size-u_step);
349  else disp_max_valid = min(param.disp_max,width-u-window_size-u_step);
350 
351  // assume, that we can compute at least 10 disparities for this pixel
352  if (disp_max_valid-disp_min_valid<10)
353  return -1;
354 
355  // for all disparities do
356  for (int16_t d=disp_min_valid; d<=disp_max_valid; d++) {
357 
358  // warp u coordinate
359  if (!right_image) u_warp = u-d;
360  else u_warp = u+d;
361 
362  // compute I2 block start addresses
363  I2_block_addr = I2_line_addr+16*u_warp;
364 
365  // compute match energy at this disparity
366  xmm6 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_1));
367  xmm6 = _mm_sad_epu8(xmm1,xmm6);
368  xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_2));
369  xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm2,xmm5),xmm6);
370  xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_3));
371  xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm3,xmm5),xmm6);
372  xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_4));
373  xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm4,xmm5),xmm6);
374  sum = _mm_extract_epi16(xmm6,0)+_mm_extract_epi16(xmm6,4);
375 
376  // best + second best match
377  if (sum<min_1_E) {
378  min_1_E = sum;
379  min_1_d = d;
380  } else if (sum<min_2_E) {
381  min_2_E = sum;
382  min_2_d = d;
383  }
384  }
385 
386  // check if best and second best match are available and if matching ratio is sufficient
387  if (min_1_d>=0 && min_2_d>=0 && (float)min_1_E<param.support_threshold*(float)min_2_E)
388  return min_1_d;
389  else
390  return -1;
391 
392  } else
393  return -1;
394 }
395 
396 vector<Elas::support_pt> Elas::computeSupportMatches (uint8_t* I1_desc,uint8_t* I2_desc) {
397 
398  // be sure that at half resolution we only need data
399  // from every second line!
400  int32_t D_candidate_stepsize = param.candidate_stepsize;
401  if (param.subsampling)
402  D_candidate_stepsize += D_candidate_stepsize%2;
403 
404  // create matrix for saving disparity candidates
405  int32_t D_can_width = 0;
406  int32_t D_can_height = 0;
407  for (int32_t u=0; u<width; u+=D_candidate_stepsize) D_can_width++;
408  for (int32_t v=0; v<height; v+=D_candidate_stepsize) D_can_height++;
409  int16_t* D_can = (int16_t*)calloc(D_can_width*D_can_height,sizeof(int16_t));
410 
411  // loop variables
412  int32_t u,v;
413  int16_t d,d2;
414  int32_t u_can, v_can;
415  int32_t lr_threshold = param.lr_threshold;
416  vector<support_pt> p_support;
417  vector<support_pt> partial_p_support[2];
418  // for all point candidates in image 1 do
419 #pragma omp parallel default(none) num_threads(2) private(u_can, v_can, u, d, v, d2) shared(partial_p_support,lr_threshold, D_can, D_can_width, D_can_height, D_candidate_stepsize, I1_desc, I2_desc)
420  {
421  int tid = omp_get_thread_num();
422 #pragma omp for
423  for (v_can=1; v_can<D_can_height; v_can++) {
424  v = v_can*D_candidate_stepsize;
425  for (u_can=1; u_can<D_can_width; u_can++) {
426  u = u_can*D_candidate_stepsize;
427 
428  // initialize disparity candidate to invalid
429  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
430 
431  // find forwards
432  d = computeMatchingDisparity(u,v,I1_desc,I2_desc,false);
433  if (d>=0) {
434 
435  // find backwards
436  d2 = computeMatchingDisparity(u-d,v,I1_desc,I2_desc,true);
437  if (d2>=0 && abs(d-d2)<=lr_threshold)
438  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = d;
439  }
440  }
441  }
442 
443 
444 
445  // remove inconsistent support points
446  //timer.start("removeInconsistentSupportPoints");
447  removeInconsistentSupportPoints(D_can,D_can_width,D_can_height);
448 
449  // remove support points on straight lines, since they are redundant
450  // this reduces the number of triangles a little bit and hence speeds up
451  // the triangulation process
452  //timer.start("removeRedundantSupportPoints");
453  removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,true);
454  removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,false);
455 
456  //}
457  // move support points from image representation into a vector representation
458  //timer.start("segundo for");
459 
460 
461 
462 #pragma omp for
463  for (int32_t v_can=1; v_can<D_can_height; v_can++)
464  for (int32_t u_can=1; u_can<D_can_width; u_can++)
465  if (*(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))>=0)
466  partial_p_support[tid].push_back(support_pt(u_can*D_candidate_stepsize,
467  v_can*D_candidate_stepsize,
468  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))));
469  }
470  p_support = partial_p_support[0];
471  p_support.insert(p_support.end(),partial_p_support[1].begin(),partial_p_support[1].end());
472 
473  // if flag is set, add support points in image corners
474  // with the same disparity as the nearest neighbor support point
475  if (param.add_corners)
476  addCornerSupportPoints(p_support);
477 
478 
479  // free memory
480  free(D_can);
481 
482  // return support point vector
483  return p_support;
484 }
485 
486 vector<Elas::triangle> Elas::computeDelaunayTriangulation (vector<support_pt> p_support,int32_t right_image) {
487 
488  // input/output structure for triangulation
489  struct triangulateio in, out;
490  int32_t k;
491 
492  // inputs
493  in.numberofpoints = p_support.size();
494  in.pointlist = (float*)malloc(in.numberofpoints*2*sizeof(float));
495  k=0;
496  if (!right_image) {
497  for (int32_t i=0; i<p_support.size(); i++) {
498  in.pointlist[k++] = p_support[i].u;
499  in.pointlist[k++] = p_support[i].v;
500  }
501  } else {
502  for (int32_t i=0; i<p_support.size(); i++) {
503  in.pointlist[k++] = p_support[i].u-p_support[i].d;
504  in.pointlist[k++] = p_support[i].v;
505  }
506  }
507  in.numberofpointattributes = 0;
508  in.pointattributelist = NULL;
509  in.pointmarkerlist = NULL;
510  in.numberofsegments = 0;
511  in.numberofholes = 0;
512  in.numberofregions = 0;
513  in.regionlist = NULL;
514 
515  // outputs
516  out.pointlist = NULL;
517  out.pointattributelist = NULL;
518  out.pointmarkerlist = NULL;
519  out.trianglelist = NULL;
520  out.triangleattributelist = NULL;
521  out.neighborlist = NULL;
522  out.segmentlist = NULL;
523  out.segmentmarkerlist = NULL;
524  out.edgelist = NULL;
525  out.edgemarkerlist = NULL;
526 
527  // do triangulation (z=zero-based, n=neighbors, Q=quiet, B=no boundary markers)
528  char parameters[] = "zQB";
529  triangulate(parameters, &in, &out, NULL);
530 
531  // put resulting triangles into vector tri
532  vector<triangle> tri;
533  k=0;
534  for (int32_t i=0; i<out.numberoftriangles; i++) {
535  tri.push_back(triangle(out.trianglelist[k],out.trianglelist[k+1],out.trianglelist[k+2]));
536  k+=3;
537  }
538 
539  // free memory used for triangulation
540  free(in.pointlist);
541  free(out.pointlist);
542  free(out.trianglelist);
543 
544  // return triangles
545  return tri;
546 }
547 
548 void Elas::computeDisparityPlanes (vector<support_pt> p_support,vector<triangle> &tri,int32_t right_image) {
549 
550  // init matrices
551  Matrix A(3,3);
552  Matrix b(3,1);
553 
554  // for all triangles do
555  for (int32_t i=0; i<tri.size(); i++) {
556 
557  // get triangle corner indices
558  int32_t c1 = tri[i].c1;
559  int32_t c2 = tri[i].c2;
560  int32_t c3 = tri[i].c3;
561 
562  // compute matrix A for linear system of left triangle
563  A.val[0][0] = p_support[c1].u;
564  A.val[1][0] = p_support[c2].u;
565  A.val[2][0] = p_support[c3].u;
566  A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
567  A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
568  A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
569 
570  // compute vector b for linear system (containing the disparities)
571  b.val[0][0] = p_support[c1].d;
572  b.val[1][0] = p_support[c2].d;
573  b.val[2][0] = p_support[c3].d;
574 
575  // on success of gauss jordan elimination
576  if (b.solve(A)) {
577 
578  // grab results from b
579  tri[i].t1a = b.val[0][0];
580  tri[i].t1b = b.val[1][0];
581  tri[i].t1c = b.val[2][0];
582 
583  // otherwise: invalid
584  } else {
585  tri[i].t1a = 0;
586  tri[i].t1b = 0;
587  tri[i].t1c = 0;
588  }
589 
590  // compute matrix A for linear system of right triangle
591  A.val[0][0] = p_support[c1].u-p_support[c1].d;
592  A.val[1][0] = p_support[c2].u-p_support[c2].d;
593  A.val[2][0] = p_support[c3].u-p_support[c3].d;
594  A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
595  A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
596  A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
597 
598  // compute vector b for linear system (containing the disparities)
599  b.val[0][0] = p_support[c1].d;
600  b.val[1][0] = p_support[c2].d;
601  b.val[2][0] = p_support[c3].d;
602 
603  // on success of gauss jordan elimination
604  if (b.solve(A)) {
605 
606  // grab results from b
607  tri[i].t2a = b.val[0][0];
608  tri[i].t2b = b.val[1][0];
609  tri[i].t2c = b.val[2][0];
610 
611  // otherwise: invalid
612  } else {
613  tri[i].t2a = 0;
614  tri[i].t2b = 0;
615  tri[i].t2c = 0;
616  }
617  }
618 }
619 
620 void Elas::createGrid(vector<support_pt> p_support,int32_t* disparity_grid,int32_t* grid_dims,bool right_image) {
621 
622  // get grid dimensions
623  int32_t grid_width = grid_dims[1];
624  int32_t grid_height = grid_dims[2];
625 
626  // allocate temporary memory
627  int32_t* temp1 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,sizeof(int32_t));
628  int32_t* temp2 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,sizeof(int32_t));
629 
630  // for all support points do
631  for (int32_t i=0; i<p_support.size(); i++) {
632 
633  // compute disparity range to fill for this support point
634  int32_t x_curr = p_support[i].u;
635  int32_t y_curr = p_support[i].v;
636  int32_t d_curr = p_support[i].d;
637  int32_t d_min = max(d_curr-1,0);
638  int32_t d_max = min(d_curr+1,param.disp_max);
639 
640  // fill disparity grid helper
641  for (int32_t d=d_min; d<=d_max; d++) {
642  int32_t x;
643  if (!right_image)
644  x = floor((float)(x_curr/param.grid_size));
645  else
646  x = floor((float)(x_curr-d_curr)/(float)param.grid_size);
647  int32_t y = floor((float)y_curr/(float)param.grid_size);
648 
649  // point may potentially lay outside (corner points)
650  if (x>=0 && x<grid_width &&y>=0 && y<grid_height) {
651  int32_t addr = getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1);
652  *(temp1+addr) = 1;
653  }
654  }
655  }
656 
657  // diffusion pointers
658  const int32_t* tl = temp1 + (0*grid_width+0)*(param.disp_max+1);
659  const int32_t* tc = temp1 + (0*grid_width+1)*(param.disp_max+1);
660  const int32_t* tr = temp1 + (0*grid_width+2)*(param.disp_max+1);
661  const int32_t* cl = temp1 + (1*grid_width+0)*(param.disp_max+1);
662  const int32_t* cc = temp1 + (1*grid_width+1)*(param.disp_max+1);
663  const int32_t* cr = temp1 + (1*grid_width+2)*(param.disp_max+1);
664  const int32_t* bl = temp1 + (2*grid_width+0)*(param.disp_max+1);
665  const int32_t* bc = temp1 + (2*grid_width+1)*(param.disp_max+1);
666  const int32_t* br = temp1 + (2*grid_width+2)*(param.disp_max+1);
667 
668  int32_t* result = temp2 + (1*grid_width+1)*(param.disp_max+1);
669  int32_t* end_input = temp1 + grid_width*grid_height*(param.disp_max+1);
670 
671  // diffuse temporary grid
672  for( ; br != end_input; tl++, tc++, tr++, cl++, cc++, cr++, bl++, bc++, br++, result++ )
673  *result = *tl | *tc | *tr | *cl | *cc | *cr | *bl | *bc | *br;
674 
675  // for all grid positions create disparity grid
676  for (int32_t x=0; x<grid_width; x++) {
677  for (int32_t y=0; y<grid_height; y++) {
678 
679  // start with second value (first is reserved for count)
680  int32_t curr_ind = 1;
681 
682  // for all disparities do
683  for (int32_t d=0; d<=param.disp_max; d++) {
684 
685  // if yes => add this disparity to current cell
686  if (*(temp2+getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1))>0) {
687  *(disparity_grid+getAddressOffsetGrid(x,y,curr_ind,grid_width,param.disp_max+2))=d;
688  curr_ind++;
689  }
690  }
691 
692  // finally set number of indices
693  *(disparity_grid+getAddressOffsetGrid(x,y,0,grid_width,param.disp_max+2))=curr_ind-1;
694  }
695  }
696 
697  // release temporary memory
698  free(temp1);
699  free(temp2);
700 }
701 
702 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,const int32_t &d,const int32_t &w,
703  const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
704  xmm2 = _mm_load_si128(I2_block_addr);
705  xmm2 = _mm_sad_epu8(xmm1,xmm2);
706  val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4)+w;
707  if (val<min_val) {
708  min_val = val;
709  min_d = d;
710  }
711 }
712 
713 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,const int32_t &d,
714  const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
715  xmm2 = _mm_load_si128(I2_block_addr);
716  xmm2 = _mm_sad_epu8(xmm1,xmm2);
717  val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
718  if (val<min_val) {
719  min_val = val;
720  min_d = d;
721  }
722 }
723 
724 inline void Elas::findMatch(int32_t &u,int32_t &v,float &plane_a,float &plane_b,float &plane_c,
725  int32_t* disparity_grid,int32_t *grid_dims,uint8_t* I1_desc,uint8_t* I2_desc,
726  int32_t *P,int32_t &plane_radius,bool &valid,bool &right_image,float* D){
727 
728  // get image width and height
729  const int32_t disp_num = grid_dims[0]-1;
730  const int32_t window_size = 2;
731 
732  // address of disparity we want to compute
733  uint32_t d_addr;
734  if (param.subsampling) d_addr = getAddressOffsetImage(u/2,v/2,width/2);
735  else d_addr = getAddressOffsetImage(u,v,width);
736 
737  // check if u is ok
738  if (u<window_size || u>=width-window_size)
739  return;
740 
741  // compute line start address
742  int32_t line_offset = 16*width*max(min(v,height-3),2);
743  uint8_t *I1_line_addr,*I2_line_addr;
744  if (!right_image) {
745  I1_line_addr = I1_desc+line_offset;
746  I2_line_addr = I2_desc+line_offset;
747  } else {
748  I1_line_addr = I2_desc+line_offset;
749  I2_line_addr = I1_desc+line_offset;
750  }
751 
752  // compute I1 block start address
753  uint8_t* I1_block_addr = I1_line_addr+16*u;
754 
755  // does this patch have enough texture?
756  int32_t sum = 0;
757  for (int32_t i=0; i<16; i++)
758  sum += abs((int32_t)(*(I1_block_addr+i))-128);
759  if (sum<param.match_texture)
760  return;
761 
762  // compute disparity, min disparity and max disparity of plane prior
763  int32_t d_plane = (int32_t)(plane_a*(float)u+plane_b*(float)v+plane_c);
764  int32_t d_plane_min = max(d_plane-plane_radius,0);
765  int32_t d_plane_max = min(d_plane+plane_radius,disp_num-1);
766 
767  // get grid pointer
768  int32_t grid_x = (int32_t)floor((float)u/(float)param.grid_size);
769  int32_t grid_y = (int32_t)floor((float)v/(float)param.grid_size);
770  uint32_t grid_addr = getAddressOffsetGrid(grid_x,grid_y,0,grid_dims[1],grid_dims[0]);
771  int32_t num_grid = *(disparity_grid+grid_addr);
772  int32_t* d_grid = disparity_grid+grid_addr+1;
773 
774  // loop variables
775  int32_t d_curr, u_warp, val;
776  int32_t min_val = 10000;
777  int32_t min_d = -1;
778  __m128i xmm1 = _mm_load_si128((__m128i*)I1_block_addr);
779  __m128i xmm2;
780 
781  // left image
782  if (!right_image) {
783  for (int32_t i=0; i<num_grid; i++) {
784  d_curr = d_grid[i];
785  if (d_curr<d_plane_min || d_curr>d_plane_max) {
786  u_warp = u-d_curr;
787  if (u_warp<window_size || u_warp>=width-window_size)
788  continue;
789  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
790  }
791  }
792  for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
793  u_warp = u-d_curr;
794  if (u_warp<window_size || u_warp>=width-window_size)
795  continue;
796  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
797  }
798 
799  // right image
800  } else {
801  for (int32_t i=0; i<num_grid; i++) {
802  d_curr = d_grid[i];
803  if (d_curr<d_plane_min || d_curr>d_plane_max) {
804  u_warp = u+d_curr;
805  if (u_warp<window_size || u_warp>=width-window_size)
806  continue;
807  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
808  }
809  }
810  for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
811  u_warp = u+d_curr;
812  if (u_warp<window_size || u_warp>=width-window_size)
813  continue;
814  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
815  }
816  }
817 
818  // set disparity value
819  if (min_d>=0) *(D+d_addr) = min_d; // MAP value (min neg-Log probability)
820  else *(D+d_addr) = -1; // invalid disparity
821 }
822 
823 // TODO: %2 => more elegantly
824 void Elas::computeDisparity(vector<support_pt> p_support,vector<triangle> tri,int32_t* disparity_grid,int32_t *grid_dims,
825  uint8_t* I1_desc,uint8_t* I2_desc,bool right_image,float* D) {
826 
827  // number of disparities
828  //const int32_t disp_num = grid_dims[0]-1;
829  int disp_num = grid_dims[0]-1;
830 
831  // descriptor window_size
832  int32_t window_size = 2;
833 
834  // init disparity image to -10
835  if (param.subsampling) {
836  for (int32_t i=0; i<(width/2)*(height/2); i++)
837  *(D+i) = -10;
838  } else {
839  for (int32_t i=0; i<width*height; i++)
840  *(D+i) = -10;
841  }
842 
843  // pre-compute prior
844  float two_sigma_squared = 2*param.sigma*param.sigma;
845  int32_t* P = new int32_t[disp_num];
846  for (int32_t delta_d=0; delta_d<disp_num; delta_d++)
847  P[delta_d] = (int32_t)((-log(param.gamma+exp(-delta_d*delta_d/two_sigma_squared))+log(param.gamma))/param.beta);
848  int32_t plane_radius = (int32_t)max((float)ceil(param.sigma*param.sradius),(float)2.0);
849 
850  // loop variables
851  int32_t c1, c2, c3;
852  float plane_a,plane_b,plane_c,plane_d;
853  uint32_t i;
854 
855  // for all triangles do
856 #pragma omp parallel for num_threads(3) default(none)\
857  private(i, plane_a, plane_b, plane_c, plane_d, c1, c2, c3)\
858  shared(P, plane_radius, two_sigma_squared, disp_num, window_size, p_support, tri, disparity_grid, grid_dims, I1_desc, I2_desc, right_image, D)
859  for (i=0; i<tri.size(); i++) {
860 
861  //printf("Matching thread %d\n", omp_get_thread_num());
862  // get plane parameters
863  uint32_t p_i = i*3;
864  if (!right_image) {
865  plane_a = tri[i].t1a;
866  plane_b = tri[i].t1b;
867  plane_c = tri[i].t1c;
868  plane_d = tri[i].t2a;
869  } else {
870  plane_a = tri[i].t2a;
871  plane_b = tri[i].t2b;
872  plane_c = tri[i].t2c;
873  plane_d = tri[i].t1a;
874  }
875 
876  // triangle corners
877  c1 = tri[i].c1;
878  c2 = tri[i].c2;
879  c3 = tri[i].c3;
880 
881  // sort triangle corners wrt. u (ascending)
882  float tri_u[3];
883  if (!right_image) {
884  tri_u[0] = p_support[c1].u;
885  tri_u[1] = p_support[c2].u;
886  tri_u[2] = p_support[c3].u;
887  } else {
888  tri_u[0] = p_support[c1].u-p_support[c1].d;
889  tri_u[1] = p_support[c2].u-p_support[c2].d;
890  tri_u[2] = p_support[c3].u-p_support[c3].d;
891  }
892  float tri_v[3] = {p_support[c1].v,p_support[c2].v,p_support[c3].v};
893 
894  for (uint32_t j=0; j<3; j++) {
895  for (uint32_t k=0; k<j; k++) {
896  if (tri_u[k]>tri_u[j]) {
897  float tri_u_temp = tri_u[j]; tri_u[j] = tri_u[k]; tri_u[k] = tri_u_temp;
898  float tri_v_temp = tri_v[j]; tri_v[j] = tri_v[k]; tri_v[k] = tri_v_temp;
899  }
900  }
901  }
902 
903  // rename corners
904  float A_u = tri_u[0]; float A_v = tri_v[0];
905  float B_u = tri_u[1]; float B_v = tri_v[1];
906  float C_u = tri_u[2]; float C_v = tri_v[2];
907 
908  // compute straight lines connecting triangle corners
909  float AB_a = 0; float AC_a = 0; float BC_a = 0;
910  if ((int32_t)(A_u)!=(int32_t)(B_u)) AB_a = (A_v-B_v)/(A_u-B_u);
911  if ((int32_t)(A_u)!=(int32_t)(C_u)) AC_a = (A_v-C_v)/(A_u-C_u);
912  if ((int32_t)(B_u)!=(int32_t)(C_u)) BC_a = (B_v-C_v)/(B_u-C_u);
913  float AB_b = A_v-AB_a*A_u;
914  float AC_b = A_v-AC_a*A_u;
915  float BC_b = B_v-BC_a*B_u;
916 
917  // a plane is only valid if itself and its projection
918  // into the other image is not too much slanted
919  bool valid = fabs(plane_a)<0.7 && fabs(plane_d)<0.7;
920 
921  // first part (triangle corner A->B)
922  if ((int32_t)(A_u)!=(int32_t)(B_u)) {
923  for (int32_t u=max((int32_t)A_u,0); u<min((int32_t)B_u,width); u++){
924  if (!param.subsampling || u%2==0) {
925  int32_t v_1 = (uint32_t)(AC_a*(float)u+AC_b);
926  int32_t v_2 = (uint32_t)(AB_a*(float)u+AB_b);
927  for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
928  if (!param.subsampling || v%2==0) {
929  findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
930  I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
931  }
932  }
933  }
934  }
935 
936  // second part (triangle corner B->C)
937  if ((int32_t)(B_u)!=(int32_t)(C_u)) {
938  for (int32_t u=max((int32_t)B_u,0); u<min((int32_t)C_u,width); u++){
939  if (!param.subsampling || u%2==0) {
940  int32_t v_1 = (uint32_t)(AC_a*(float)u+AC_b);
941  int32_t v_2 = (uint32_t)(BC_a*(float)u+BC_b);
942  for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
943  if (!param.subsampling || v%2==0) {
944  findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
945  I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
946  }
947  }
948  }
949  }
950 
951  }
952 
953  delete[] P;
954 }
955 
956 void Elas::leftRightConsistencyCheck(float* D1,float* D2) {
957 
958  // get disparity image dimensions
959  int32_t D_width = width;
960  int32_t D_height = height;
961  if (param.subsampling) {
962  D_width = width/2;
963  D_height = height/2;
964  }
965 
966  // make a copy of both images
967  float* D1_copy = (float*)malloc(D_width*D_height*sizeof(float));
968  float* D2_copy = (float*)malloc(D_width*D_height*sizeof(float));
969  memcpy(D1_copy,D1,D_width*D_height*sizeof(float));
970  memcpy(D2_copy,D2,D_width*D_height*sizeof(float));
971 
972  // loop variables
973  uint32_t addr,addr_warp;
974  float u_warp_1,u_warp_2,d1,d2;
975 
976  // for all image points do
977  for (int32_t u=0; u<D_width; u++) {
978  for (int32_t v=0; v<D_height; v++) {
979 
980  // compute address (u,v) and disparity value
981  addr = getAddressOffsetImage(u,v,D_width);
982  d1 = *(D1_copy+addr);
983  d2 = *(D2_copy+addr);
984  if (param.subsampling) {
985  u_warp_1 = (float)u-d1/2;
986  u_warp_2 = (float)u+d2/2;
987  } else {
988  u_warp_1 = (float)u-d1;
989  u_warp_2 = (float)u+d2;
990  }
991 
992 
993  // check if left disparity is valid
994  if (d1>=0 && u_warp_1>=0 && u_warp_1<D_width) {
995 
996  // compute warped image address
997  addr_warp = getAddressOffsetImage((int32_t)u_warp_1,v,D_width);
998 
999  // if check failed
1000  if (fabs(*(D2_copy+addr_warp)-d1)>param.lr_threshold)
1001  *(D1+addr) = -10;
1002 
1003  // set invalid
1004  } else
1005  *(D1+addr) = -10;
1006 
1007  // check if right disparity is valid
1008  if (d2>=0 && u_warp_2>=0 && u_warp_2<D_width) {
1009 
1010  // compute warped image address
1011  addr_warp = getAddressOffsetImage((int32_t)u_warp_2,v,D_width);
1012 
1013  // if check failed
1014  if (fabs(*(D1_copy+addr_warp)-d2)>param.lr_threshold)
1015  *(D2+addr) = -10;
1016 
1017  // set invalid
1018  } else
1019  *(D2+addr) = -10;
1020  }
1021  }
1022 
1023  // release memory
1024  free(D1_copy);
1025  free(D2_copy);
1026 }
1027 
1028 void Elas::removeSmallSegments (float* D) {
1029 
1030  // get disparity image dimensions
1031  int32_t D_width = width;
1032  int32_t D_height = height;
1033  int32_t D_speckle_size = param.speckle_size;
1034  if (param.subsampling) {
1035  D_width = width/2;
1036  D_height = height/2;
1037  D_speckle_size = sqrt((float)param.speckle_size)*2;
1038  }
1039 
1040  // allocate memory on heap for dynamic programming arrays
1041  int32_t *D_done = (int32_t*)calloc(D_width*D_height,sizeof(int32_t));
1042  int32_t *seg_list_u = (int32_t*)calloc(D_width*D_height,sizeof(int32_t));
1043  int32_t *seg_list_v = (int32_t*)calloc(D_width*D_height,sizeof(int32_t));
1044  int32_t seg_list_count;
1045  int32_t seg_list_curr;
1046  int32_t u_neighbor[4];
1047  int32_t v_neighbor[4];
1048  int32_t u_seg_curr;
1049  int32_t v_seg_curr;
1050 
1051  // declare loop variables
1052  int32_t addr_start, addr_curr, addr_neighbor;
1053 
1054  // for all pixels do
1055  for (int32_t u=0; u<D_width; u++) {
1056  for (int32_t v=0; v<D_height; v++) {
1057 
1058  // get address of first pixel in this segment
1059  addr_start = getAddressOffsetImage(u,v,D_width);
1060 
1061  // if this pixel has not already been processed
1062  if (*(D_done+addr_start)==0) {
1063 
1064  // init segment list (add first element
1065  // and set it to be the next element to check)
1066  *(seg_list_u+0) = u;
1067  *(seg_list_v+0) = v;
1068  seg_list_count = 1;
1069  seg_list_curr = 0;
1070 
1071  // add neighboring segments as long as there
1072  // are none-processed pixels in the seg_list;
1073  // none-processed means: seg_list_curr<seg_list_count
1074  while (seg_list_curr<seg_list_count) {
1075 
1076  // get current position from seg_list
1077  u_seg_curr = *(seg_list_u+seg_list_curr);
1078  v_seg_curr = *(seg_list_v+seg_list_curr);
1079 
1080  // get address of current pixel in this segment
1081  addr_curr = getAddressOffsetImage(u_seg_curr,v_seg_curr,D_width);
1082 
1083  // fill list with neighbor positions
1084  u_neighbor[0] = u_seg_curr-1; v_neighbor[0] = v_seg_curr;
1085  u_neighbor[1] = u_seg_curr+1; v_neighbor[1] = v_seg_curr;
1086  u_neighbor[2] = u_seg_curr; v_neighbor[2] = v_seg_curr-1;
1087  u_neighbor[3] = u_seg_curr; v_neighbor[3] = v_seg_curr+1;
1088 
1089  // for all neighbors do
1090  for (int32_t i=0; i<4; i++) {
1091 
1092  // check if neighbor is inside image
1093  if (u_neighbor[i]>=0 && v_neighbor[i]>=0 && u_neighbor[i]<D_width && v_neighbor[i]<D_height) {
1094 
1095  // get neighbor pixel address
1096  addr_neighbor = getAddressOffsetImage(u_neighbor[i],v_neighbor[i],D_width);
1097 
1098  // check if neighbor has not been added yet and if it is valid
1099  if (*(D_done+addr_neighbor)==0 && *(D+addr_neighbor)>=0) {
1100 
1101  // is the neighbor similar to the current pixel
1102  // (=belonging to the current segment)
1103  if (fabs(*(D+addr_curr)-*(D+addr_neighbor))<=param.speckle_sim_threshold) {
1104 
1105  // add neighbor coordinates to segment list
1106  *(seg_list_u+seg_list_count) = u_neighbor[i];
1107  *(seg_list_v+seg_list_count) = v_neighbor[i];
1108  seg_list_count++;
1109 
1110  // set neighbor pixel in I_done to "done"
1111  // (otherwise a pixel may be added 2 times to the list, as
1112  // neighbor of one pixel and as neighbor of another pixel)
1113  *(D_done+addr_neighbor) = 1;
1114  }
1115  }
1116 
1117  }
1118  }
1119 
1120  // set current pixel in seg_list to "done"
1121  seg_list_curr++;
1122 
1123  // set current pixel in I_done to "done"
1124  *(D_done+addr_curr) = 1;
1125 
1126  } // end: while (seg_list_curr<seg_list_count)
1127 
1128  // if segment NOT large enough => invalidate pixels
1129  if (seg_list_count<D_speckle_size) {
1130 
1131  // for all pixels in current segment invalidate pixels
1132  for (int32_t i=0; i<seg_list_count; i++) {
1133  addr_curr = getAddressOffsetImage(*(seg_list_u+i),*(seg_list_v+i),D_width);
1134  *(D+addr_curr) = -10;
1135  }
1136  }
1137  } // end: if (*(I_done+addr_start)==0)
1138 
1139  }
1140  }
1141 
1142  // free memory
1143  free(D_done);
1144  free(seg_list_u);
1145  free(seg_list_v);
1146 }
1147 
1148 void Elas::gapInterpolation(float* D) {
1149 
1150  // get disparity image dimensions
1151  int32_t D_width = width;
1152  int32_t D_height = height;
1153  int32_t D_ipol_gap_width = param.ipol_gap_width;
1154  if (param.subsampling) {
1155  D_width = width/2;
1156  D_height = height/2;
1157  D_ipol_gap_width = param.ipol_gap_width/2+1;
1158  }
1159 
1160  // discontinuity threshold
1161  float discon_threshold = 3.0;
1162 
1163  // declare loop variables
1164  int32_t count,addr,v_first,v_last,u_first,u_last;
1165  float d1,d2,d_ipol;
1166 
1167  // 1. Row-wise:
1168  // for each row do
1169  for (int32_t v=0; v<D_height; v++) {
1170 
1171  // init counter
1172  count = 0;
1173 
1174  // for each element of the row do
1175  for (int32_t u=0; u<D_width; u++) {
1176 
1177  // get address of this location
1178  addr = getAddressOffsetImage(u,v,D_width);
1179 
1180  // if disparity valid
1181  if (*(D+addr)>=0) {
1182 
1183  // check if speckle is small enough
1184  if (count>=1 && count<=D_ipol_gap_width) {
1185 
1186  // first and last value for interpolation
1187  u_first = u-count;
1188  u_last = u-1;
1189 
1190  // if value in range
1191  if (u_first>0 && u_last<D_width-1) {
1192 
1193  // compute mean disparity
1194  d1 = *(D+getAddressOffsetImage(u_first-1,v,D_width));
1195  d2 = *(D+getAddressOffsetImage(u_last+1,v,D_width));
1196  if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1197  else d_ipol = min(d1,d2);
1198 
1199  // set all values to d_ipol
1200  for (int32_t u_curr=u_first; u_curr<=u_last; u_curr++)
1201  *(D+getAddressOffsetImage(u_curr,v,D_width)) = d_ipol;
1202  }
1203 
1204  }
1205 
1206  // reset counter
1207  count = 0;
1208 
1209  // otherwise increment counter
1210  } else {
1211  count++;
1212  }
1213  }
1214 
1215  // if full size disp map requested
1216  if (param.add_corners) {
1217 
1218  // extrapolate to the left
1219  for (int32_t u=0; u<D_width; u++) {
1220 
1221  // get address of this location
1222  addr = getAddressOffsetImage(u,v,D_width);
1223 
1224  // if disparity valid
1225  if (*(D+addr)>=0) {
1226  for (int32_t u2=max(u-D_ipol_gap_width,0); u2<u; u2++)
1227  *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1228  break;
1229  }
1230  }
1231 
1232  // extrapolate to the right
1233  for (int32_t u=D_width-1; u>=0; u--) {
1234 
1235  // get address of this location
1236  addr = getAddressOffsetImage(u,v,D_width);
1237 
1238  // if disparity valid
1239  if (*(D+addr)>=0) {
1240  for (int32_t u2=u; u2<=min(u+D_ipol_gap_width,D_width-1); u2++)
1241  *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1242  break;
1243  }
1244  }
1245  }
1246  }
1247 
1248  // 2. Column-wise:
1249  // for each column do
1250  for (int32_t u=0; u<D_width; u++) {
1251 
1252  // init counter
1253  count = 0;
1254 
1255  // for each element of the column do
1256  for (int32_t v=0; v<D_height; v++) {
1257 
1258  // get address of this location
1259  addr = getAddressOffsetImage(u,v,D_width);
1260 
1261  // if disparity valid
1262  if (*(D+addr)>=0) {
1263 
1264  // check if gap is small enough
1265  if (count>=1 && count<=D_ipol_gap_width) {
1266 
1267  // first and last value for interpolation
1268  v_first = v-count;
1269  v_last = v-1;
1270 
1271  // if value in range
1272  if (v_first>0 && v_last<D_height-1) {
1273 
1274  // compute mean disparity
1275  d1 = *(D+getAddressOffsetImage(u,v_first-1,D_width));
1276  d2 = *(D+getAddressOffsetImage(u,v_last+1,D_width));
1277  if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1278  else d_ipol = min(d1,d2);
1279 
1280  // set all values to d_ipol
1281  for (int32_t v_curr=v_first; v_curr<=v_last; v_curr++)
1282  *(D+getAddressOffsetImage(u,v_curr,D_width)) = d_ipol;
1283  }
1284 
1285  }
1286 
1287  // reset counter
1288  count = 0;
1289 
1290  // otherwise increment counter
1291  } else {
1292  count++;
1293  }
1294  }
1295  }
1296 }
1297 
1298 // implements approximation to bilateral filtering
1299 void Elas::adaptiveMean (float* D) {
1300 
1301  // get disparity image dimensions
1302  int32_t D_width = width;
1303  int32_t D_height = height;
1304  if (param.subsampling) {
1305  D_width = width/2;
1306  D_height = height/2;
1307  }
1308 
1309  // allocate temporary memory
1310  float* D_copy = (float*)malloc(D_width*D_height*sizeof(float));
1311  float* D_tmp = (float*)malloc(D_width*D_height*sizeof(float));
1312  memcpy(D_copy,D,D_width*D_height*sizeof(float));
1313 
1314  // zero input disparity maps to -10 (this makes the bilateral
1315  // weights of all valid disparities to 0 in this region)
1316  for (int32_t i=0; i<D_width*D_height; i++) {
1317  if (*(D+i)<0) {
1318  *(D_copy+i) = -10;
1319  *(D_tmp+i) = -10;
1320  }
1321  }
1322 
1323  __m128 xconst0 = _mm_set1_ps(0);
1324  __m128 xconst4 = _mm_set1_ps(4);
1325  __m128 xval,xweight1,xweight2,xfactor1,xfactor2;
1326 
1327  float *val = (float *)_mm_malloc(8*sizeof(float),16);
1328  float *weight = (float*)_mm_malloc(4*sizeof(float),16);
1329  float *factor = (float*)_mm_malloc(4*sizeof(float),16);
1330 
1331  // set absolute mask
1332  __m128 xabsmask = _mm_set1_ps(0x7FFFFFFF);
1333 
1334  // when doing subsampling: 4 pixel bilateral filter width
1335  if (param.subsampling) {
1336 
1337  // horizontal filter
1338  for (int32_t v=3; v<D_height-3; v++) {
1339 
1340  // init
1341  for (int32_t u=0; u<3; u++)
1342  val[u] = *(D_copy+v*D_width+u);
1343 
1344  // loop
1345  for (int32_t u=3; u<D_width; u++) {
1346 
1347  // set
1348  float val_curr = *(D_copy+v*D_width+(u-1));
1349  val[u%4] = *(D_copy+v*D_width+u);
1350 
1351  xval = _mm_load_ps(val);
1352  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1353  xweight1 = _mm_and_ps(xweight1,xabsmask);
1354  xweight1 = _mm_sub_ps(xconst4,xweight1);
1355  xweight1 = _mm_max_ps(xconst0,xweight1);
1356  xfactor1 = _mm_mul_ps(xval,xweight1);
1357 
1358  _mm_store_ps(weight,xweight1);
1359  _mm_store_ps(factor,xfactor1);
1360 
1361  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1362  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1363 
1364  if (weight_sum>0) {
1365  float d = factor_sum/weight_sum;
1366  if (d>=0) *(D_tmp+v*D_width+(u-1)) = d;
1367  }
1368  }
1369  }
1370 
1371  // vertical filter
1372  for (int32_t u=3; u<D_width-3; u++) {
1373 
1374  // init
1375  for (int32_t v=0; v<3; v++)
1376  val[v] = *(D_tmp+v*D_width+u);
1377 
1378  // loop
1379  for (int32_t v=3; v<D_height; v++) {
1380 
1381  // set
1382  float val_curr = *(D_tmp+(v-1)*D_width+u);
1383  val[v%4] = *(D_tmp+v*D_width+u);
1384 
1385  xval = _mm_load_ps(val);
1386  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1387  xweight1 = _mm_and_ps(xweight1,xabsmask);
1388  xweight1 = _mm_sub_ps(xconst4,xweight1);
1389  xweight1 = _mm_max_ps(xconst0,xweight1);
1390  xfactor1 = _mm_mul_ps(xval,xweight1);
1391 
1392  _mm_store_ps(weight,xweight1);
1393  _mm_store_ps(factor,xfactor1);
1394 
1395  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1396  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1397 
1398  if (weight_sum>0) {
1399  float d = factor_sum/weight_sum;
1400  if (d>=0) *(D+(v-1)*D_width+u) = d;
1401  }
1402  }
1403  }
1404 
1405  // full resolution: 8 pixel bilateral filter width
1406  } else {
1407 
1408 
1409  // horizontal filter
1410  for (int32_t v=3; v<D_height-3; v++) {
1411 
1412  // init
1413  for (int32_t u=0; u<7; u++)
1414  val[u] = *(D_copy+v*D_width+u);
1415 
1416  // loop
1417  for (int32_t u=7; u<D_width; u++) {
1418 
1419  // set
1420  float val_curr = *(D_copy+v*D_width+(u-3));
1421  val[u%8] = *(D_copy+v*D_width+u);
1422 
1423  xval = _mm_load_ps(val);
1424  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1425  xweight1 = _mm_and_ps(xweight1,xabsmask);
1426  xweight1 = _mm_sub_ps(xconst4,xweight1);
1427  xweight1 = _mm_max_ps(xconst0,xweight1);
1428  xfactor1 = _mm_mul_ps(xval,xweight1);
1429 
1430  xval = _mm_load_ps(val+4);
1431  xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1432  xweight2 = _mm_and_ps(xweight2,xabsmask);
1433  xweight2 = _mm_sub_ps(xconst4,xweight2);
1434  xweight2 = _mm_max_ps(xconst0,xweight2);
1435  xfactor2 = _mm_mul_ps(xval,xweight2);
1436 
1437  xweight1 = _mm_add_ps(xweight1,xweight2);
1438  xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1439 
1440  _mm_store_ps(weight,xweight1);
1441  _mm_store_ps(factor,xfactor1);
1442 
1443  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1444  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1445 
1446  if (weight_sum>0) {
1447  float d = factor_sum/weight_sum;
1448  if (d>=0) *(D_tmp+v*D_width+(u-3)) = d;
1449  }
1450  }
1451  }
1452 
1453  // vertical filter
1454  for (int32_t u=3; u<D_width-3; u++) {
1455 
1456  // init
1457  for (int32_t v=0; v<7; v++)
1458  val[v] = *(D_tmp+v*D_width+u);
1459 
1460  // loop
1461  for (int32_t v=7; v<D_height; v++) {
1462 
1463  // set
1464  float val_curr = *(D_tmp+(v-3)*D_width+u);
1465  val[v%8] = *(D_tmp+v*D_width+u);
1466 
1467  xval = _mm_load_ps(val);
1468  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1469  xweight1 = _mm_and_ps(xweight1,xabsmask);
1470  xweight1 = _mm_sub_ps(xconst4,xweight1);
1471  xweight1 = _mm_max_ps(xconst0,xweight1);
1472  xfactor1 = _mm_mul_ps(xval,xweight1);
1473 
1474  xval = _mm_load_ps(val+4);
1475  xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1476  xweight2 = _mm_and_ps(xweight2,xabsmask);
1477  xweight2 = _mm_sub_ps(xconst4,xweight2);
1478  xweight2 = _mm_max_ps(xconst0,xweight2);
1479  xfactor2 = _mm_mul_ps(xval,xweight2);
1480 
1481  xweight1 = _mm_add_ps(xweight1,xweight2);
1482  xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1483 
1484  _mm_store_ps(weight,xweight1);
1485  _mm_store_ps(factor,xfactor1);
1486 
1487  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1488  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1489 
1490  if (weight_sum>0) {
1491  float d = factor_sum/weight_sum;
1492  if (d>=0) *(D+(v-3)*D_width+u) = d;
1493  }
1494  }
1495  }
1496  }
1497 
1498  // free memory
1499  _mm_free(val);
1500  _mm_free(weight);
1501  _mm_free(factor);
1502  free(D_copy);
1503  free(D_tmp);
1504 }
1505 
1506 void Elas::median (float* D) {
1507 
1508  // get disparity image dimensions
1509  int32_t D_width = width;
1510  int32_t D_height = height;
1511  if (param.subsampling) {
1512  D_width = width/2;
1513  D_height = height/2;
1514  }
1515 
1516  // temporary memory
1517  float *D_temp = (float*)calloc(D_width*D_height,sizeof(float));
1518 
1519  int32_t window_size = 3;
1520 
1521  float *vals = new float[window_size*2+1];
1522  int32_t i,j;
1523  float temp;
1524 
1525  // first step: horizontal median filter
1526  for (int32_t u=window_size; u<D_width-window_size; u++) {
1527  for (int32_t v=window_size; v<D_height-window_size; v++) {
1528  if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1529  j = 0;
1530  for (int32_t u2=u-window_size; u2<=u+window_size; u2++) {
1531  temp = *(D+getAddressOffsetImage(u2,v,D_width));
1532  i = j-1;
1533  while (i>=0 && *(vals+i)>temp) {
1534  *(vals+i+1) = *(vals+i);
1535  i--;
1536  }
1537  *(vals+i+1) = temp;
1538  j++;
1539  }
1540  *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1541  } else {
1542  *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));
1543  }
1544 
1545  }
1546  }
1547 
1548  // second step: vertical median filter
1549  for (int32_t u=window_size; u<D_width-window_size; u++) {
1550  for (int32_t v=window_size; v<D_height-window_size; v++) {
1551  if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1552  j = 0;
1553  for (int32_t v2=v-window_size; v2<=v+window_size; v2++) {
1554  temp = *(D_temp+getAddressOffsetImage(u,v2,D_width));
1555  i = j-1;
1556  while (i>=0 && *(vals+i)>temp) {
1557  *(vals+i+1) = *(vals+i);
1558  i--;
1559  }
1560  *(vals+i+1) = temp;
1561  j++;
1562  }
1563  *(D+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1564  } else {
1565  *(D+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));
1566  }
1567  }
1568  }
1569 
1570  free(D_temp);
1571  free(vals);
1572 }