stereo-vision
All Data Structures Namespaces Functions Modules Pages
elas.cpp
1 /*
2 Copyright 2011. All rights reserved.
3 Institute of Measurement and Control Systems
4 Karlsruhe Institute of Technology, Germany
5 
6 This file is part of libelas.
7 Authors: Andreas Geiger
8 
9 libelas is free software; you can redistribute it and/or modify it under the
10 terms of the GNU General Public License as published by the Free Software
11 Foundation; either version 3 of the License, or any later version.
12 
13 libelas is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
15 PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License along with
18 libelas; if not, write to the Free Software Foundation, Inc., 51 Franklin
19 Street, Fifth Floor, Boston, MA 02110-1301, USA
20  */
21 
22 #include "elas.h"
23 
24 #include <algorithm>
25 #include <math.h>
26 #include "descriptor.h"
27 #include "triangle.h"
28 #include "matrix.h"
29 
30 using namespace std;
31 
32 bool Elas::process (uint8_t* I1_,uint8_t* I2_,float* D1,float* D2,const int32_t* dims){
33 
34  // get width, height and bytes per line
35  width = dims[0];
36  height = dims[1];
37  bpl = width + 15-(width-1)%16;
38 
39  // copy images to byte aligned memory
40  I1 = (uint8_t*)_mm_malloc(bpl*height*sizeof(uint8_t),16);
41  I2 = (uint8_t*)_mm_malloc(bpl*height*sizeof(uint8_t),16);
42  memset (I1,0,bpl*height*sizeof(uint8_t));
43  memset (I2,0,bpl*height*sizeof(uint8_t));
44  if (bpl==dims[2]) {
45  memcpy(I1,I1_,bpl*height*sizeof(uint8_t));
46  memcpy(I2,I2_,bpl*height*sizeof(uint8_t));
47  } else {
48  for (int32_t v=0; v<height; v++) {
49  memcpy(I1+v*bpl,I1_+v*dims[2],width*sizeof(uint8_t));
50  memcpy(I2+v*bpl,I2_+v*dims[2],width*sizeof(uint8_t));
51  }
52  }
53 
54 #ifdef PROFILE
55  timer.start("Descriptor");
56 #endif
57  Descriptor desc1(I1,width,height,bpl,param.subsampling);
58  Descriptor desc2(I2,width,height,bpl,param.subsampling);
59 
60 #ifdef PROFILE
61  timer.start("Support Matches");
62 #endif
63  vector<support_pt> p_support = computeSupportMatches(desc1.I_desc,desc2.I_desc);
64 
65  bool success;
66 
67  if (p_support.size()>=3)
68  {
69 
70  // // if not enough support points for triangulation
71  // if (p_support.size()<3) {
72  // cout << "ERROR: Need at least 3 support points!" << endl;
73  // _mm_free(I1);
74  // _mm_free(I2);
75  // return;
76  // }
77 
78 #ifdef PROFILE
79  timer.start("Delaunay Triangulation");
80 #endif
81  vector<triangle> tri_1 = computeDelaunayTriangulation(p_support,0);
82  vector<triangle> tri_2 = computeDelaunayTriangulation(p_support,1);
83 
84 #ifdef PROFILE
85  timer.start("Disparity Planes");
86 #endif
87  computeDisparityPlanes(p_support,tri_1,0);
88  computeDisparityPlanes(p_support,tri_2,1);
89 
90 #ifdef PROFILE
91  timer.start("Grid");
92 #endif
93 
94  // allocate memory for disparity grid
95  int32_t grid_width = (int32_t)ceil((float)width/(float)param.grid_size);
96  int32_t grid_height = (int32_t)ceil((float)height/(float)param.grid_size);
97  int32_t grid_dims[3] = {param.disp_max+2,grid_width,grid_height};
98  int32_t* disparity_grid_1 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,sizeof(int32_t));
99  int32_t* disparity_grid_2 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,sizeof(int32_t));
100 
101  createGrid(p_support,disparity_grid_1,grid_dims,0);
102  createGrid(p_support,disparity_grid_2,grid_dims,1);
103 
104 #ifdef PROFILE
105  timer.start("Matching");
106 #endif
107  computeDisparity(p_support,tri_1,disparity_grid_1,grid_dims,desc1.I_desc,desc2.I_desc,0,D1);
108  computeDisparity(p_support,tri_2,disparity_grid_2,grid_dims,desc1.I_desc,desc2.I_desc,1,D2);
109 
110 #ifdef PROFILE
111  timer.start("L/R Consistency Check");
112 #endif
113  leftRightConsistencyCheck(D1,D2);
114 
115 #ifdef PROFILE
116  timer.start("Remove Small Segments");
117 #endif
118  removeSmallSegments(D1);
119  if (!param.postprocess_only_left)
120  removeSmallSegments(D2);
121 
122 #ifdef PROFILE
123  timer.start("Gap Interpolation");
124 #endif
125  gapInterpolation(D1);
126  if (!param.postprocess_only_left)
127  gapInterpolation(D2);
128 
129  if (param.filter_adaptive_mean) {
130 #ifdef PROFILE
131  timer.start("Adaptive Mean");
132 #endif
133  adaptiveMean(D1);
134  if (!param.postprocess_only_left)
135  adaptiveMean(D2);
136  }
137 
138  if (param.filter_median) {
139 #ifdef PROFILE
140  timer.start("Median");
141 #endif
142  median(D1);
143  if (!param.postprocess_only_left)
144  median(D2);
145  }
146 
147 #ifdef PROFILE
148  timer.plot();
149 #endif
150 
151  free(disparity_grid_1);
152  free(disparity_grid_2);
153 
154  success = true;
155 
156  } else
157  {
158  success = false;
159  }
160 
161  // release memory
162  _mm_free(I1);
163  _mm_free(I2);
164 
165  return success;
166 }
167 
168 void Elas::removeInconsistentSupportPoints (int16_t* D_can,int32_t D_can_width,int32_t D_can_height) {
169 
170  // for all valid support points do
171  for (int32_t u_can=0; u_can<D_can_width; u_can++) {
172  for (int32_t v_can=0; v_can<D_can_height; v_can++) {
173  int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
174  if (d_can>=0) {
175 
176  // compute number of other points supporting the current point
177  int32_t support = 0;
178  for (int32_t u_can_2=u_can-param.incon_window_size; u_can_2<=u_can+param.incon_window_size; u_can_2++) {
179  for (int32_t v_can_2=v_can-param.incon_window_size; v_can_2<=v_can+param.incon_window_size; v_can_2++) {
180  if (u_can_2>=0 && v_can_2>=0 && u_can_2<D_can_width && v_can_2<D_can_height) {
181  int16_t d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
182  if (d_can_2>=0 && abs(d_can-d_can_2)<=param.incon_threshold)
183  support++;
184  }
185  }
186  }
187 
188  // invalidate support point if number of supporting points is too low
189  if (support<param.incon_min_support)
190  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
191  }
192  }
193  }
194 }
195 
196 void Elas::removeRedundantSupportPoints(int16_t* D_can,int32_t D_can_width,int32_t D_can_height,
197  int32_t redun_max_dist, int32_t redun_threshold, bool vertical) {
198 
199  // parameters
200  int32_t redun_dir_u[2] = {0,0};
201  int32_t redun_dir_v[2] = {0,0};
202  if (vertical) {
203  redun_dir_v[0] = -1;
204  redun_dir_v[1] = +1;
205  } else {
206  redun_dir_u[0] = -1;
207  redun_dir_u[1] = +1;
208  }
209 
210  // for all valid support points do
211  for (int32_t u_can=0; u_can<D_can_width; u_can++) {
212  for (int32_t v_can=0; v_can<D_can_height; v_can++) {
213  int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
214  if (d_can>=0) {
215 
216  // check all directions for redundancy
217  bool redundant = true;
218  for (int32_t i=0; i<2; i++) {
219 
220  // search for support
221  int32_t u_can_2 = u_can;
222  int32_t v_can_2 = v_can;
223  int16_t d_can_2;
224  bool support = false;
225  for (int32_t j=0; j<redun_max_dist; j++) {
226  u_can_2 += redun_dir_u[i];
227  v_can_2 += redun_dir_v[i];
228  if (u_can_2<0 || v_can_2<0 || u_can_2>=D_can_width || v_can_2>=D_can_height)
229  break;
230  d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
231  if (d_can_2>=0 && abs(d_can-d_can_2)<=redun_threshold) {
232  support = true;
233  break;
234  }
235  }
236 
237  // if we have no support => point is not redundant
238  if (!support) {
239  redundant = false;
240  break;
241  }
242  }
243 
244  // invalidate support point if it is redundant
245  if (redundant)
246  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
247  }
248  }
249  }
250 }
251 
252 void Elas::addCornerSupportPoints(vector<support_pt> &p_support) {
253 
254  // list of border points
255  vector<support_pt> p_border;
256  p_border.push_back(support_pt(0,0,0));
257  p_border.push_back(support_pt(0,height-1,0));
258  p_border.push_back(support_pt(width-1,0,0));
259  p_border.push_back(support_pt(width-1,height-1,0));
260 
261  // find closest d
262  for (int32_t i=0; i<p_border.size(); i++) {
263  int32_t best_dist = 10000000;
264  for (int32_t j=0; j<p_support.size(); j++) {
265  int32_t du = p_border[i].u-p_support[j].u;
266  int32_t dv = p_border[i].v-p_support[j].v;
267  int32_t curr_dist = du*du+dv*dv;
268  if (curr_dist<best_dist) {
269  best_dist = curr_dist;
270  p_border[i].d = p_support[j].d;
271  }
272  }
273  }
274 
275  // for right image
276  p_border.push_back(support_pt(p_border[2].u+p_border[2].d,p_border[2].v,p_border[2].d));
277  p_border.push_back(support_pt(p_border[3].u+p_border[3].d,p_border[3].v,p_border[3].d));
278 
279  // add border points to support points
280  for (int32_t i=0; i<p_border.size(); i++)
281  p_support.push_back(p_border[i]);
282 }
283 
284 inline int16_t Elas::computeMatchingDisparity (const int32_t &u,const int32_t &v,uint8_t* I1_desc,uint8_t* I2_desc,const bool &right_image) {
285 
286  const int32_t u_step = 2;
287  const int32_t v_step = 2;
288  const int32_t window_size = 3;
289 
290  int32_t desc_offset_1 = -16*u_step-16*width*v_step;
291  int32_t desc_offset_2 = +16*u_step-16*width*v_step;
292  int32_t desc_offset_3 = -16*u_step+16*width*v_step;
293  int32_t desc_offset_4 = +16*u_step+16*width*v_step;
294 
295  __m128i xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
296 
297  // check if we are inside the image region
298  if (u>=window_size+u_step && u<=width-window_size-1-u_step && v>=window_size+v_step && v<=height-window_size-1-v_step) {
299 
300  // compute desc and start addresses
301  int32_t line_offset = 16*width*v;
302  uint8_t *I1_line_addr,*I2_line_addr;
303  if (!right_image) {
304  I1_line_addr = I1_desc+line_offset;
305  I2_line_addr = I2_desc+line_offset;
306  } else {
307  I1_line_addr = I2_desc+line_offset;
308  I2_line_addr = I1_desc+line_offset;
309  }
310 
311  // compute I1 block start addresses
312  uint8_t* I1_block_addr = I1_line_addr+16*u;
313  uint8_t* I2_block_addr;
314 
315  // we require at least some texture
316  int32_t sum = 0;
317  for (int32_t i=0; i<16; i++)
318  sum += abs((int32_t)(*(I1_block_addr+i))-128);
319  if (sum<param.support_texture)
320  return -1;
321 
322  // load first blocks to xmm registers
323  xmm1 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_1));
324  xmm2 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_2));
325  xmm3 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_3));
326  xmm4 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_4));
327 
328  // declare match energy for each disparity
329  int32_t u_warp;
330 
331  // best match
332  int16_t min_1_E = 32767;
333  int16_t min_1_d = -1;
334  int16_t min_2_E = 32767;
335  int16_t min_2_d = -1;
336 
337  // get valid disparity range
338  int32_t disp_min_valid = max(param.disp_min,0);
339  int32_t disp_max_valid = param.disp_max;
340  if (!right_image) disp_max_valid = min(param.disp_max,u-window_size-u_step);
341  else disp_max_valid = min(param.disp_max,width-u-window_size-u_step);
342 
343  // assume, that we can compute at least 10 disparities for this pixel
344  if (disp_max_valid-disp_min_valid<10)
345  return -1;
346 
347  // for all disparities do
348  for (int16_t d=disp_min_valid; d<=disp_max_valid; d++) {
349 
350  // warp u coordinate
351  if (!right_image) u_warp = u-d;
352  else u_warp = u+d;
353 
354  // compute I2 block start addresses
355  I2_block_addr = I2_line_addr+16*u_warp;
356 
357  // compute match energy at this disparity
358  xmm6 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_1));
359  xmm6 = _mm_sad_epu8(xmm1,xmm6);
360  xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_2));
361  xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm2,xmm5),xmm6);
362  xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_3));
363  xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm3,xmm5),xmm6);
364  xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_4));
365  xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm4,xmm5),xmm6);
366  sum = _mm_extract_epi16(xmm6,0)+_mm_extract_epi16(xmm6,4);
367 
368  // best + second best match
369  if (sum<min_1_E) {
370  min_2_E = min_1_E;
371  min_2_d = min_1_d;
372  min_1_E = sum;
373  min_1_d = d;
374  } else if (sum<min_2_E) {
375  min_2_E = sum;
376  min_2_d = d;
377  }
378  }
379 
380  // check if best and second best match are available and if matching ratio is sufficient
381  if (min_1_d>=0 && min_2_d>=0 && (float)min_1_E<param.support_threshold*(float)min_2_E)
382  return min_1_d;
383  else
384  return -1;
385 
386  } else
387  return -1;
388 }
389 
390 vector<Elas::support_pt> Elas::computeSupportMatches (uint8_t* I1_desc,uint8_t* I2_desc) {
391 
392  // be sure that at half resolution we only need data
393  // from every second line!
394  int32_t D_candidate_stepsize = param.candidate_stepsize;
395  if (param.subsampling)
396  D_candidate_stepsize += D_candidate_stepsize%2;
397 
398  // create matrix for saving disparity candidates
399  int32_t D_can_width = 0;
400  int32_t D_can_height = 0;
401  for (int32_t u=0; u<width; u+=D_candidate_stepsize) D_can_width++;
402  for (int32_t v=0; v<height; v+=D_candidate_stepsize) D_can_height++;
403  int16_t* D_can = (int16_t*)calloc(D_can_width*D_can_height,sizeof(int16_t));
404 
405  // loop variables
406  int32_t u,v;
407  int16_t d,d2;
408 
409  // for all point candidates in image 1 do
410  for (int32_t u_can=1; u_can<D_can_width; u_can++) {
411  u = u_can*D_candidate_stepsize;
412  for (int32_t v_can=1; v_can<D_can_height; v_can++) {
413  v = v_can*D_candidate_stepsize;
414 
415  // initialize disparity candidate to invalid
416  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
417 
418  // find forwards
419  d = computeMatchingDisparity(u,v,I1_desc,I2_desc,false);
420  if (d>=0) {
421 
422  // find backwards
423  d2 = computeMatchingDisparity(u-d,v,I1_desc,I2_desc,true);
424  if (d2>=0 && abs(d-d2)<=param.lr_threshold)
425  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = d;
426  }
427  }
428  }
429 
430  // remove inconsistent support points
431  removeInconsistentSupportPoints(D_can,D_can_width,D_can_height);
432 
433  // remove support points on straight lines, since they are redundant
434  // this reduces the number of triangles a little bit and hence speeds up
435  // the triangulation process
436  removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,true);
437  removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,false);
438 
439  // move support points from image representation into a vector representation
440  vector<support_pt> p_support;
441  for (int32_t u_can=1; u_can<D_can_width; u_can++)
442  for (int32_t v_can=1; v_can<D_can_height; v_can++)
443  if (*(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))>=0)
444  p_support.push_back(support_pt(u_can*D_candidate_stepsize,
445  v_can*D_candidate_stepsize,
446  *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))));
447 
448  // if flag is set, add support points in image corners
449  // with the same disparity as the nearest neighbor support point
450  if (param.add_corners)
451  addCornerSupportPoints(p_support);
452 
453  // free memory
454  free(D_can);
455 
456  // return support point vector
457  return p_support;
458 }
459 
460 vector<Elas::triangle> Elas::computeDelaunayTriangulation (vector<support_pt> p_support,int32_t right_image) {
461 
462  // input/output structure for triangulation
463  struct triangulateio in, out;
464  int32_t k;
465 
466  // inputs
467  in.numberofpoints = p_support.size();
468  in.pointlist = (float*)malloc(in.numberofpoints*2*sizeof(float));
469  k=0;
470  if (!right_image) {
471  for (int32_t i=0; i<p_support.size(); i++) {
472  in.pointlist[k++] = p_support[i].u;
473  in.pointlist[k++] = p_support[i].v;
474  }
475  } else {
476  for (int32_t i=0; i<p_support.size(); i++) {
477  in.pointlist[k++] = p_support[i].u-p_support[i].d;
478  in.pointlist[k++] = p_support[i].v;
479  }
480  }
481  in.numberofpointattributes = 0;
482  in.pointattributelist = NULL;
483  in.pointmarkerlist = NULL;
484  in.numberofsegments = 0;
485  in.numberofholes = 0;
486  in.numberofregions = 0;
487  in.regionlist = NULL;
488 
489  // outputs
490  out.pointlist = NULL;
491  out.pointattributelist = NULL;
492  out.pointmarkerlist = NULL;
493  out.trianglelist = NULL;
494  out.triangleattributelist = NULL;
495  out.neighborlist = NULL;
496  out.segmentlist = NULL;
497  out.segmentmarkerlist = NULL;
498  out.edgelist = NULL;
499  out.edgemarkerlist = NULL;
500 
501  // do triangulation (z=zero-based, n=neighbors, Q=quiet, B=no boundary markers)
502  char parameters[] = "zQB";
503  triangulate(parameters, &in, &out, NULL);
504 
505  // put resulting triangles into vector tri
506  vector<triangle> tri;
507  k=0;
508  for (int32_t i=0; i<out.numberoftriangles; i++) {
509  tri.push_back(triangle(out.trianglelist[k],out.trianglelist[k+1],out.trianglelist[k+2]));
510  k+=3;
511  }
512 
513  // free memory used for triangulation
514  free(in.pointlist);
515  free(out.pointlist);
516  free(out.trianglelist);
517 
518  // return triangles
519  return tri;
520 }
521 
522 void Elas::computeDisparityPlanes (vector<support_pt> p_support,vector<triangle> &tri,int32_t right_image) {
523 
524  // init matrices
525  Matrix A(3,3);
526  Matrix b(3,1);
527 
528  // for all triangles do
529  for (int32_t i=0; i<tri.size(); i++) {
530 
531  // get triangle corner indices
532  int32_t c1 = tri[i].c1;
533  int32_t c2 = tri[i].c2;
534  int32_t c3 = tri[i].c3;
535 
536  // compute matrix A for linear system of left triangle
537  A.val[0][0] = p_support[c1].u;
538  A.val[1][0] = p_support[c2].u;
539  A.val[2][0] = p_support[c3].u;
540  A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
541  A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
542  A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
543 
544  // compute vector b for linear system (containing the disparities)
545  b.val[0][0] = p_support[c1].d;
546  b.val[1][0] = p_support[c2].d;
547  b.val[2][0] = p_support[c3].d;
548 
549  // on success of gauss jordan elimination
550  if (b.solve(A)) {
551 
552  // grab results from b
553  tri[i].t1a = b.val[0][0];
554  tri[i].t1b = b.val[1][0];
555  tri[i].t1c = b.val[2][0];
556 
557  // otherwise: invalid
558  } else {
559  tri[i].t1a = 0;
560  tri[i].t1b = 0;
561  tri[i].t1c = 0;
562  }
563 
564  // compute matrix A for linear system of right triangle
565  A.val[0][0] = p_support[c1].u-p_support[c1].d;
566  A.val[1][0] = p_support[c2].u-p_support[c2].d;
567  A.val[2][0] = p_support[c3].u-p_support[c3].d;
568  A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
569  A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
570  A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
571 
572  // compute vector b for linear system (containing the disparities)
573  b.val[0][0] = p_support[c1].d;
574  b.val[1][0] = p_support[c2].d;
575  b.val[2][0] = p_support[c3].d;
576 
577  // on success of gauss jordan elimination
578  if (b.solve(A)) {
579 
580  // grab results from b
581  tri[i].t2a = b.val[0][0];
582  tri[i].t2b = b.val[1][0];
583  tri[i].t2c = b.val[2][0];
584 
585  // otherwise: invalid
586  } else {
587  tri[i].t2a = 0;
588  tri[i].t2b = 0;
589  tri[i].t2c = 0;
590  }
591  }
592 }
593 
594 void Elas::createGrid(vector<support_pt> p_support,int32_t* disparity_grid,int32_t* grid_dims,bool right_image) {
595 
596  // get grid dimensions
597  int32_t grid_width = grid_dims[1];
598  int32_t grid_height = grid_dims[2];
599 
600  // allocate temporary memory
601  int32_t* temp1 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,sizeof(int32_t));
602  int32_t* temp2 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,sizeof(int32_t));
603 
604  // for all support points do
605  for (int32_t i=0; i<p_support.size(); i++) {
606 
607  // compute disparity range to fill for this support point
608  int32_t x_curr = p_support[i].u;
609  int32_t y_curr = p_support[i].v;
610  int32_t d_curr = p_support[i].d;
611  int32_t d_min = max(d_curr-1,0);
612  int32_t d_max = min(d_curr+1,param.disp_max);
613 
614  // fill disparity grid helper
615  for (int32_t d=d_min; d<=d_max; d++) {
616  int32_t x;
617  if (!right_image)
618  x = floor((float)(x_curr/param.grid_size));
619  else
620  x = floor((float)(x_curr-d_curr)/(float)param.grid_size);
621  int32_t y = floor((float)y_curr/(float)param.grid_size);
622 
623  // point may potentially lay outside (corner points)
624  if (x>=0 && x<grid_width &&y>=0 && y<grid_height) {
625  int32_t addr = getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1);
626  *(temp1+addr) = 1;
627  }
628  }
629  }
630 
631  // diffusion pointers
632  const int32_t* tl = temp1 + (0*grid_width+0)*(param.disp_max+1);
633  const int32_t* tc = temp1 + (0*grid_width+1)*(param.disp_max+1);
634  const int32_t* tr = temp1 + (0*grid_width+2)*(param.disp_max+1);
635  const int32_t* cl = temp1 + (1*grid_width+0)*(param.disp_max+1);
636  const int32_t* cc = temp1 + (1*grid_width+1)*(param.disp_max+1);
637  const int32_t* cr = temp1 + (1*grid_width+2)*(param.disp_max+1);
638  const int32_t* bl = temp1 + (2*grid_width+0)*(param.disp_max+1);
639  const int32_t* bc = temp1 + (2*grid_width+1)*(param.disp_max+1);
640  const int32_t* br = temp1 + (2*grid_width+2)*(param.disp_max+1);
641 
642  int32_t* result = temp2 + (1*grid_width+1)*(param.disp_max+1);
643  int32_t* end_input = temp1 + grid_width*grid_height*(param.disp_max+1);
644 
645  // diffuse temporary grid
646  for( ; br != end_input; tl++, tc++, tr++, cl++, cc++, cr++, bl++, bc++, br++, result++ )
647  *result = *tl | *tc | *tr | *cl | *cc | *cr | *bl | *bc | *br;
648 
649  // for all grid positions create disparity grid
650  for (int32_t x=0; x<grid_width; x++) {
651  for (int32_t y=0; y<grid_height; y++) {
652 
653  // start with second value (first is reserved for count)
654  int32_t curr_ind = 1;
655 
656  // for all disparities do
657  for (int32_t d=0; d<=param.disp_max; d++) {
658 
659  // if yes => add this disparity to current cell
660  if (*(temp2+getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1))>0) {
661  *(disparity_grid+getAddressOffsetGrid(x,y,curr_ind,grid_width,param.disp_max+2))=d;
662  curr_ind++;
663  }
664  }
665 
666  // finally set number of indices
667  *(disparity_grid+getAddressOffsetGrid(x,y,0,grid_width,param.disp_max+2))=curr_ind-1;
668  }
669  }
670 
671  // release temporary memory
672  free(temp1);
673  free(temp2);
674 }
675 
676 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,const int32_t &d,const int32_t &w,
677  const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
678  xmm2 = _mm_load_si128(I2_block_addr);
679  xmm2 = _mm_sad_epu8(xmm1,xmm2);
680  val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4)+w;
681  if (val<min_val) {
682  min_val = val;
683  min_d = d;
684  }
685 }
686 
687 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,const int32_t &d,
688  const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
689  xmm2 = _mm_load_si128(I2_block_addr);
690  xmm2 = _mm_sad_epu8(xmm1,xmm2);
691  val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
692  if (val<min_val) {
693  min_val = val;
694  min_d = d;
695  }
696 }
697 
698 inline void Elas::findMatch(int32_t &u,int32_t &v,float &plane_a,float &plane_b,float &plane_c,
699  int32_t* disparity_grid,int32_t *grid_dims,uint8_t* I1_desc,uint8_t* I2_desc,
700  int32_t *P,int32_t &plane_radius,bool &valid,bool &right_image,float* D){
701 
702  // get image width and height
703  const int32_t disp_num = grid_dims[0]-1;
704  const int32_t window_size = 2;
705 
706  // address of disparity we want to compute
707  uint32_t d_addr;
708  if (param.subsampling) d_addr = getAddressOffsetImage(u/2,v/2,width/2);
709  else d_addr = getAddressOffsetImage(u,v,width);
710 
711  // check if u is ok
712  if (u<window_size || u>=width-window_size)
713  return;
714 
715  // compute line start address
716  int32_t line_offset = 16*width*max(min(v,height-3),2);
717  uint8_t *I1_line_addr,*I2_line_addr;
718  if (!right_image) {
719  I1_line_addr = I1_desc+line_offset;
720  I2_line_addr = I2_desc+line_offset;
721  } else {
722  I1_line_addr = I2_desc+line_offset;
723  I2_line_addr = I1_desc+line_offset;
724  }
725 
726  // compute I1 block start address
727  uint8_t* I1_block_addr = I1_line_addr+16*u;
728 
729  // does this patch have enough texture?
730  int32_t sum = 0;
731  for (int32_t i=0; i<16; i++)
732  sum += abs((int32_t)(*(I1_block_addr+i))-128);
733  if (sum<param.match_texture)
734  return;
735 
736  // compute disparity, min disparity and max disparity of plane prior
737  int32_t d_plane = (int32_t)(plane_a*(float)u+plane_b*(float)v+plane_c);
738  int32_t d_plane_min = max(d_plane-plane_radius,0);
739  int32_t d_plane_max = min(d_plane+plane_radius,disp_num-1);
740 
741  // get grid pointer
742  int32_t grid_x = (int32_t)floor((float)u/(float)param.grid_size);
743  int32_t grid_y = (int32_t)floor((float)v/(float)param.grid_size);
744  uint32_t grid_addr = getAddressOffsetGrid(grid_x,grid_y,0,grid_dims[1],grid_dims[0]);
745  int32_t num_grid = *(disparity_grid+grid_addr);
746  int32_t* d_grid = disparity_grid+grid_addr+1;
747 
748  // loop variables
749  int32_t d_curr, u_warp, val;
750  int32_t min_val = 10000;
751  int32_t min_d = -1;
752  __m128i xmm1 = _mm_load_si128((__m128i*)I1_block_addr);
753  __m128i xmm2;
754 
755  // left image
756  if (!right_image) {
757  for (int32_t i=0; i<num_grid; i++) {
758  d_curr = d_grid[i];
759  if (d_curr<d_plane_min || d_curr>d_plane_max) {
760  u_warp = u-d_curr;
761  if (u_warp<window_size || u_warp>=width-window_size)
762  continue;
763  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
764  }
765  }
766  for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
767  u_warp = u-d_curr;
768  if (u_warp<window_size || u_warp>=width-window_size)
769  continue;
770  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
771  }
772 
773  // right image
774  } else {
775  for (int32_t i=0; i<num_grid; i++) {
776  d_curr = d_grid[i];
777  if (d_curr<d_plane_min || d_curr>d_plane_max) {
778  u_warp = u+d_curr;
779  if (u_warp<window_size || u_warp>=width-window_size)
780  continue;
781  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
782  }
783  }
784  for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
785  u_warp = u+d_curr;
786  if (u_warp<window_size || u_warp>=width-window_size)
787  continue;
788  updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
789  }
790  }
791 
792  // set disparity value
793  if (min_d>=0) *(D+d_addr) = min_d; // MAP value (min neg-Log probability)
794  else *(D+d_addr) = -1; // invalid disparity
795 }
796 
797 // TODO: %2 => more elegantly
798 void Elas::computeDisparity(vector<support_pt> p_support,vector<triangle> tri,int32_t* disparity_grid,int32_t *grid_dims,
799  uint8_t* I1_desc,uint8_t* I2_desc,bool right_image,float* D) {
800 
801  // number of disparities
802  const int32_t disp_num = grid_dims[0]-1;
803 
804  // descriptor window_size
805  int32_t window_size = 2;
806 
807  // init disparity image to -10
808  if (param.subsampling) {
809  for (int32_t i=0; i<(width/2)*(height/2); i++)
810  *(D+i) = -10;
811  } else {
812  for (int32_t i=0; i<width*height; i++)
813  *(D+i) = -10;
814  }
815 
816  // pre-compute prior
817  float two_sigma_squared = 2*param.sigma*param.sigma;
818  int32_t* P = new int32_t[disp_num];
819  for (int32_t delta_d=0; delta_d<disp_num; delta_d++)
820  P[delta_d] = (int32_t)((-log(param.gamma+exp(-delta_d*delta_d/two_sigma_squared))+log(param.gamma))/param.beta);
821  int32_t plane_radius = (int32_t)max((float)ceil(param.sigma*param.sradius),(float)2.0);
822 
823  // loop variables
824  int32_t c1, c2, c3;
825  float plane_a,plane_b,plane_c,plane_d;
826 
827  // for all triangles do
828  for (uint32_t i=0; i<tri.size(); i++) {
829 
830  // get plane parameters
831  uint32_t p_i = i*3;
832  if (!right_image) {
833  plane_a = tri[i].t1a;
834  plane_b = tri[i].t1b;
835  plane_c = tri[i].t1c;
836  plane_d = tri[i].t2a;
837  } else {
838  plane_a = tri[i].t2a;
839  plane_b = tri[i].t2b;
840  plane_c = tri[i].t2c;
841  plane_d = tri[i].t1a;
842  }
843 
844  // triangle corners
845  c1 = tri[i].c1;
846  c2 = tri[i].c2;
847  c3 = tri[i].c3;
848 
849  // sort triangle corners wrt. u (ascending)
850  float tri_u[3];
851  if (!right_image) {
852  tri_u[0] = p_support[c1].u;
853  tri_u[1] = p_support[c2].u;
854  tri_u[2] = p_support[c3].u;
855  } else {
856  tri_u[0] = p_support[c1].u-p_support[c1].d;
857  tri_u[1] = p_support[c2].u-p_support[c2].d;
858  tri_u[2] = p_support[c3].u-p_support[c3].d;
859  }
860  float tri_v[3] = {p_support[c1].v,p_support[c2].v,p_support[c3].v};
861 
862  for (uint32_t j=0; j<3; j++) {
863  for (uint32_t k=0; k<j; k++) {
864  if (tri_u[k]>tri_u[j]) {
865  float tri_u_temp = tri_u[j]; tri_u[j] = tri_u[k]; tri_u[k] = tri_u_temp;
866  float tri_v_temp = tri_v[j]; tri_v[j] = tri_v[k]; tri_v[k] = tri_v_temp;
867  }
868  }
869  }
870 
871  // rename corners
872  float A_u = tri_u[0]; float A_v = tri_v[0];
873  float B_u = tri_u[1]; float B_v = tri_v[1];
874  float C_u = tri_u[2]; float C_v = tri_v[2];
875 
876  // compute straight lines connecting triangle corners
877  float AB_a = 0; float AC_a = 0; float BC_a = 0;
878  if ((int32_t)(A_u)!=(int32_t)(B_u)) AB_a = (A_v-B_v)/(A_u-B_u);
879  if ((int32_t)(A_u)!=(int32_t)(C_u)) AC_a = (A_v-C_v)/(A_u-C_u);
880  if ((int32_t)(B_u)!=(int32_t)(C_u)) BC_a = (B_v-C_v)/(B_u-C_u);
881  float AB_b = A_v-AB_a*A_u;
882  float AC_b = A_v-AC_a*A_u;
883  float BC_b = B_v-BC_a*B_u;
884 
885  // a plane is only valid if itself and its projection
886  // into the other image is not too much slanted
887  bool valid = fabs(plane_a)<0.7 && fabs(plane_d)<0.7;
888 
889  // first part (triangle corner A->B)
890  if ((int32_t)(A_u)!=(int32_t)(B_u)) {
891  for (int32_t u=max((int32_t)A_u,0); u<min((int32_t)B_u,width); u++){
892  if (!param.subsampling || u%2==0) {
893  int32_t v_1 = (uint32_t)(AC_a*(float)u+AC_b);
894  int32_t v_2 = (uint32_t)(AB_a*(float)u+AB_b);
895  for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
896  if (!param.subsampling || v%2==0) {
897  findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
898  I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
899  }
900  }
901  }
902  }
903 
904  // second part (triangle corner B->C)
905  if ((int32_t)(B_u)!=(int32_t)(C_u)) {
906  for (int32_t u=max((int32_t)B_u,0); u<min((int32_t)C_u,width); u++){
907  if (!param.subsampling || u%2==0) {
908  int32_t v_1 = (uint32_t)(AC_a*(float)u+AC_b);
909  int32_t v_2 = (uint32_t)(BC_a*(float)u+BC_b);
910  for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
911  if (!param.subsampling || v%2==0) {
912  findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
913  I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
914  }
915  }
916  }
917  }
918 
919  }
920 
921  delete[] P;
922 }
923 
924 void Elas::leftRightConsistencyCheck(float* D1,float* D2) {
925 
926  // get disparity image dimensions
927  int32_t D_width = width;
928  int32_t D_height = height;
929  if (param.subsampling) {
930  D_width = width/2;
931  D_height = height/2;
932  }
933 
934  // make a copy of both images
935  float* D1_copy = (float*)malloc(D_width*D_height*sizeof(float));
936  float* D2_copy = (float*)malloc(D_width*D_height*sizeof(float));
937  memcpy(D1_copy,D1,D_width*D_height*sizeof(float));
938  memcpy(D2_copy,D2,D_width*D_height*sizeof(float));
939 
940  // loop variables
941  uint32_t addr,addr_warp;
942  float u_warp_1,u_warp_2,d1,d2;
943 
944  // for all image points do
945  for (int32_t u=0; u<D_width; u++) {
946  for (int32_t v=0; v<D_height; v++) {
947 
948  // compute address (u,v) and disparity value
949  addr = getAddressOffsetImage(u,v,D_width);
950  d1 = *(D1_copy+addr);
951  d2 = *(D2_copy+addr);
952  if (param.subsampling) {
953  u_warp_1 = (float)u-d1/2;
954  u_warp_2 = (float)u+d2/2;
955  } else {
956  u_warp_1 = (float)u-d1;
957  u_warp_2 = (float)u+d2;
958  }
959 
960 
961  // check if left disparity is valid
962  if (d1>=0 && u_warp_1>=0 && u_warp_1<D_width) {
963 
964  // compute warped image address
965  addr_warp = getAddressOffsetImage((int32_t)u_warp_1,v,D_width);
966 
967  // if check failed
968  if (fabs(*(D2_copy+addr_warp)-d1)>param.lr_threshold)
969  *(D1+addr) = -10;
970 
971  // set invalid
972  } else
973  *(D1+addr) = -10;
974 
975  // check if right disparity is valid
976  if (d2>=0 && u_warp_2>=0 && u_warp_2<D_width) {
977 
978  // compute warped image address
979  addr_warp = getAddressOffsetImage((int32_t)u_warp_2,v,D_width);
980 
981  // if check failed
982  if (fabs(*(D1_copy+addr_warp)-d2)>param.lr_threshold)
983  *(D2+addr) = -10;
984 
985  // set invalid
986  } else
987  *(D2+addr) = -10;
988  }
989  }
990 
991  // release memory
992  free(D1_copy);
993  free(D2_copy);
994 }
995 
996 void Elas::removeSmallSegments (float* D) {
997 
998  // get disparity image dimensions
999  int32_t D_width = width;
1000  int32_t D_height = height;
1001  int32_t D_speckle_size = param.speckle_size;
1002  if (param.subsampling) {
1003  D_width = width/2;
1004  D_height = height/2;
1005  D_speckle_size = sqrt((float)param.speckle_size)*2;
1006  }
1007 
1008  // allocate memory on heap for dynamic programming arrays
1009  int32_t *D_done = (int32_t*)calloc(D_width*D_height,sizeof(int32_t));
1010  int32_t *seg_list_u = (int32_t*)calloc(D_width*D_height,sizeof(int32_t));
1011  int32_t *seg_list_v = (int32_t*)calloc(D_width*D_height,sizeof(int32_t));
1012  int32_t seg_list_count;
1013  int32_t seg_list_curr;
1014  int32_t u_neighbor[4];
1015  int32_t v_neighbor[4];
1016  int32_t u_seg_curr;
1017  int32_t v_seg_curr;
1018 
1019  // declare loop variables
1020  int32_t addr_start, addr_curr, addr_neighbor;
1021 
1022  // for all pixels do
1023  for (int32_t u=0; u<D_width; u++) {
1024  for (int32_t v=0; v<D_height; v++) {
1025 
1026  // get address of first pixel in this segment
1027  addr_start = getAddressOffsetImage(u,v,D_width);
1028 
1029  // if this pixel has not already been processed
1030  if (*(D_done+addr_start)==0) {
1031 
1032  // init segment list (add first element
1033  // and set it to be the next element to check)
1034  *(seg_list_u+0) = u;
1035  *(seg_list_v+0) = v;
1036  seg_list_count = 1;
1037  seg_list_curr = 0;
1038 
1039  // add neighboring segments as long as there
1040  // are none-processed pixels in the seg_list;
1041  // none-processed means: seg_list_curr<seg_list_count
1042  while (seg_list_curr<seg_list_count) {
1043 
1044  // get current position from seg_list
1045  u_seg_curr = *(seg_list_u+seg_list_curr);
1046  v_seg_curr = *(seg_list_v+seg_list_curr);
1047 
1048  // get address of current pixel in this segment
1049  addr_curr = getAddressOffsetImage(u_seg_curr,v_seg_curr,D_width);
1050 
1051  // fill list with neighbor positions
1052  u_neighbor[0] = u_seg_curr-1; v_neighbor[0] = v_seg_curr;
1053  u_neighbor[1] = u_seg_curr+1; v_neighbor[1] = v_seg_curr;
1054  u_neighbor[2] = u_seg_curr; v_neighbor[2] = v_seg_curr-1;
1055  u_neighbor[3] = u_seg_curr; v_neighbor[3] = v_seg_curr+1;
1056 
1057  // for all neighbors do
1058  for (int32_t i=0; i<4; i++) {
1059 
1060  // check if neighbor is inside image
1061  if (u_neighbor[i]>=0 && v_neighbor[i]>=0 && u_neighbor[i]<D_width && v_neighbor[i]<D_height) {
1062 
1063  // get neighbor pixel address
1064  addr_neighbor = getAddressOffsetImage(u_neighbor[i],v_neighbor[i],D_width);
1065 
1066  // check if neighbor has not been added yet and if it is valid
1067  if (*(D_done+addr_neighbor)==0 && *(D+addr_neighbor)>=0) {
1068 
1069  // is the neighbor similar to the current pixel
1070  // (=belonging to the current segment)
1071  if (fabs(*(D+addr_curr)-*(D+addr_neighbor))<=param.speckle_sim_threshold) {
1072 
1073  // add neighbor coordinates to segment list
1074  *(seg_list_u+seg_list_count) = u_neighbor[i];
1075  *(seg_list_v+seg_list_count) = v_neighbor[i];
1076  seg_list_count++;
1077 
1078  // set neighbor pixel in I_done to "done"
1079  // (otherwise a pixel may be added 2 times to the list, as
1080  // neighbor of one pixel and as neighbor of another pixel)
1081  *(D_done+addr_neighbor) = 1;
1082  }
1083  }
1084 
1085  }
1086  }
1087 
1088  // set current pixel in seg_list to "done"
1089  seg_list_curr++;
1090 
1091  // set current pixel in I_done to "done"
1092  *(D_done+addr_curr) = 1;
1093 
1094  } // end: while (seg_list_curr<seg_list_count)
1095 
1096  // if segment NOT large enough => invalidate pixels
1097  if (seg_list_count<D_speckle_size) {
1098 
1099  // for all pixels in current segment invalidate pixels
1100  for (int32_t i=0; i<seg_list_count; i++) {
1101  addr_curr = getAddressOffsetImage(*(seg_list_u+i),*(seg_list_v+i),D_width);
1102  *(D+addr_curr) = -10;
1103  }
1104  }
1105  } // end: if (*(I_done+addr_start)==0)
1106 
1107  }
1108  }
1109 
1110  // free memory
1111  free(D_done);
1112  free(seg_list_u);
1113  free(seg_list_v);
1114 }
1115 
1116 void Elas::gapInterpolation(float* D) {
1117 
1118  // get disparity image dimensions
1119  int32_t D_width = width;
1120  int32_t D_height = height;
1121  int32_t D_ipol_gap_width = param.ipol_gap_width;
1122  if (param.subsampling) {
1123  D_width = width/2;
1124  D_height = height/2;
1125  D_ipol_gap_width = param.ipol_gap_width/2+1;
1126  }
1127 
1128  // discontinuity threshold
1129  float discon_threshold = 3.0;
1130 
1131  // declare loop variables
1132  int32_t count,addr,v_first,v_last,u_first,u_last;
1133  float d1,d2,d_ipol;
1134 
1135  // 1. Row-wise:
1136  // for each row do
1137  for (int32_t v=0; v<D_height; v++) {
1138 
1139  // init counter
1140  count = 0;
1141 
1142  // for each element of the row do
1143  for (int32_t u=0; u<D_width; u++) {
1144 
1145  // get address of this location
1146  addr = getAddressOffsetImage(u,v,D_width);
1147 
1148  // if disparity valid
1149  if (*(D+addr)>=0) {
1150 
1151  // check if speckle is small enough
1152  if (count>=1 && count<=D_ipol_gap_width) {
1153 
1154  // first and last value for interpolation
1155  u_first = u-count;
1156  u_last = u-1;
1157 
1158  // if value in range
1159  if (u_first>0 && u_last<D_width-1) {
1160 
1161  // compute mean disparity
1162  d1 = *(D+getAddressOffsetImage(u_first-1,v,D_width));
1163  d2 = *(D+getAddressOffsetImage(u_last+1,v,D_width));
1164  if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1165  else d_ipol = min(d1,d2);
1166 
1167  // set all values to d_ipol
1168  for (int32_t u_curr=u_first; u_curr<=u_last; u_curr++)
1169  *(D+getAddressOffsetImage(u_curr,v,D_width)) = d_ipol;
1170  }
1171 
1172  }
1173 
1174  // reset counter
1175  count = 0;
1176 
1177  // otherwise increment counter
1178  } else {
1179  count++;
1180  }
1181  }
1182 
1183  // if full size disp map requested
1184  if (param.add_corners) {
1185 
1186  // extrapolate to the left
1187  for (int32_t u=0; u<D_width; u++) {
1188 
1189  // get address of this location
1190  addr = getAddressOffsetImage(u,v,D_width);
1191 
1192  // if disparity valid
1193  if (*(D+addr)>=0) {
1194  for (int32_t u2=max(u-D_ipol_gap_width,0); u2<u; u2++)
1195  *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1196  break;
1197  }
1198  }
1199 
1200  // extrapolate to the right
1201  for (int32_t u=D_width-1; u>=0; u--) {
1202 
1203  // get address of this location
1204  addr = getAddressOffsetImage(u,v,D_width);
1205 
1206  // if disparity valid
1207  if (*(D+addr)>=0) {
1208  for (int32_t u2=u; u2<=min(u+D_ipol_gap_width,D_width-1); u2++)
1209  *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1210  break;
1211  }
1212  }
1213  }
1214  }
1215 
1216  // 2. Column-wise:
1217  // for each column do
1218  for (int32_t u=0; u<D_width; u++) {
1219 
1220  // init counter
1221  count = 0;
1222 
1223  // for each element of the column do
1224  for (int32_t v=0; v<D_height; v++) {
1225 
1226  // get address of this location
1227  addr = getAddressOffsetImage(u,v,D_width);
1228 
1229  // if disparity valid
1230  if (*(D+addr)>=0) {
1231 
1232  // check if gap is small enough
1233  if (count>=1 && count<=D_ipol_gap_width) {
1234 
1235  // first and last value for interpolation
1236  v_first = v-count;
1237  v_last = v-1;
1238 
1239  // if value in range
1240  if (v_first>0 && v_last<D_height-1) {
1241 
1242  // compute mean disparity
1243  d1 = *(D+getAddressOffsetImage(u,v_first-1,D_width));
1244  d2 = *(D+getAddressOffsetImage(u,v_last+1,D_width));
1245  if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1246  else d_ipol = min(d1,d2);
1247 
1248  // set all values to d_ipol
1249  for (int32_t v_curr=v_first; v_curr<=v_last; v_curr++)
1250  *(D+getAddressOffsetImage(u,v_curr,D_width)) = d_ipol;
1251  }
1252 
1253  }
1254 
1255  // reset counter
1256  count = 0;
1257 
1258  // otherwise increment counter
1259  } else {
1260  count++;
1261  }
1262  }
1263 
1264  // added extrapolation to top and bottom since bottom rows sometimes stay unlabeled...
1265  // DS 5/12/2014
1266 
1267  // if full size disp map requested
1268  if (param.add_corners) {
1269 
1270  // extrapolate towards top
1271  for (int32_t v=0; v<D_height; v++) {
1272 
1273  // get address of this location
1274  addr = getAddressOffsetImage(u,v,D_width);
1275 
1276  // if disparity valid
1277  if (*(D+addr)>=0) {
1278  for (int32_t v2=max(v-D_ipol_gap_width,0); v2<v; v2++)
1279  *(D+getAddressOffsetImage(u,v2,D_width)) = *(D+addr);
1280  break;
1281  }
1282  }
1283 
1284  // extrapolate towards the bottom
1285  for (int32_t v=D_height-1; v>=0; v--) {
1286 
1287  // get address of this location
1288  addr = getAddressOffsetImage(u,v,D_width);
1289 
1290  // if disparity valid
1291  if (*(D+addr)>=0) {
1292  for (int32_t v2=v; v2<=min(v+D_ipol_gap_width,D_height-1); v2++)
1293  *(D+getAddressOffsetImage(u,v2,D_width)) = *(D+addr);
1294  break;
1295  }
1296  }
1297  }
1298  }
1299 }
1300 
1301 // implements approximation to bilateral filtering
1302 void Elas::adaptiveMean (float* D) {
1303 
1304  // get disparity image dimensions
1305  int32_t D_width = width;
1306  int32_t D_height = height;
1307  if (param.subsampling) {
1308  D_width = width/2;
1309  D_height = height/2;
1310  }
1311 
1312  // allocate temporary memory
1313  float* D_copy = (float*)malloc(D_width*D_height*sizeof(float));
1314  float* D_tmp = (float*)malloc(D_width*D_height*sizeof(float));
1315  memcpy(D_copy,D,D_width*D_height*sizeof(float));
1316 
1317  // zero input disparity maps to -10 (this makes the bilateral
1318  // weights of all valid disparities to 0 in this region)
1319  for (int32_t i=0; i<D_width*D_height; i++) {
1320  if (*(D+i)<0) {
1321  *(D_copy+i) = -10;
1322  *(D_tmp+i) = -10;
1323  }
1324  }
1325 
1326  __m128 xconst0 = _mm_set1_ps(0);
1327  __m128 xconst4 = _mm_set1_ps(4);
1328  __m128 xval,xweight1,xweight2,xfactor1,xfactor2;
1329 
1330  float *val = (float *)_mm_malloc(8*sizeof(float),16);
1331  float *weight = (float*)_mm_malloc(4*sizeof(float),16);
1332  float *factor = (float*)_mm_malloc(4*sizeof(float),16);
1333 
1334  // set absolute mask
1335  __m128 xabsmask = _mm_set1_ps(0x7FFFFFFF);
1336 
1337  // when doing subsampling: 4 pixel bilateral filter width
1338  if (param.subsampling) {
1339 
1340  // horizontal filter
1341  for (int32_t v=3; v<D_height-3; v++) {
1342 
1343  // init
1344  for (int32_t u=0; u<3; u++)
1345  val[u] = *(D_copy+v*D_width+u);
1346 
1347  // loop
1348  for (int32_t u=3; u<D_width; u++) {
1349 
1350  // set
1351  float val_curr = *(D_copy+v*D_width+(u-1));
1352  val[u%4] = *(D_copy+v*D_width+u);
1353 
1354  xval = _mm_load_ps(val);
1355  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1356  xweight1 = _mm_and_ps(xweight1,xabsmask);
1357  xweight1 = _mm_sub_ps(xconst4,xweight1);
1358  xweight1 = _mm_max_ps(xconst0,xweight1);
1359  xfactor1 = _mm_mul_ps(xval,xweight1);
1360 
1361  _mm_store_ps(weight,xweight1);
1362  _mm_store_ps(factor,xfactor1);
1363 
1364  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1365  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1366 
1367  if (weight_sum>0) {
1368  float d = factor_sum/weight_sum;
1369  if (d>=0) *(D_tmp+v*D_width+(u-1)) = d;
1370  }
1371  }
1372  }
1373 
1374  // vertical filter
1375  for (int32_t u=3; u<D_width-3; u++) {
1376 
1377  // init
1378  for (int32_t v=0; v<3; v++)
1379  val[v] = *(D_tmp+v*D_width+u);
1380 
1381  // loop
1382  for (int32_t v=3; v<D_height; v++) {
1383 
1384  // set
1385  float val_curr = *(D_tmp+(v-1)*D_width+u);
1386  val[v%4] = *(D_tmp+v*D_width+u);
1387 
1388  xval = _mm_load_ps(val);
1389  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1390  xweight1 = _mm_and_ps(xweight1,xabsmask);
1391  xweight1 = _mm_sub_ps(xconst4,xweight1);
1392  xweight1 = _mm_max_ps(xconst0,xweight1);
1393  xfactor1 = _mm_mul_ps(xval,xweight1);
1394 
1395  _mm_store_ps(weight,xweight1);
1396  _mm_store_ps(factor,xfactor1);
1397 
1398  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1399  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1400 
1401  if (weight_sum>0) {
1402  float d = factor_sum/weight_sum;
1403  if (d>=0) *(D+(v-1)*D_width+u) = d;
1404  }
1405  }
1406  }
1407 
1408  // full resolution: 8 pixel bilateral filter width
1409  } else {
1410 
1411 
1412  // horizontal filter
1413  for (int32_t v=3; v<D_height-3; v++) {
1414 
1415  // init
1416  for (int32_t u=0; u<7; u++)
1417  val[u] = *(D_copy+v*D_width+u);
1418 
1419  // loop
1420  for (int32_t u=7; u<D_width; u++) {
1421 
1422  // set
1423  float val_curr = *(D_copy+v*D_width+(u-3));
1424  val[u%8] = *(D_copy+v*D_width+u);
1425 
1426  xval = _mm_load_ps(val);
1427  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1428  xweight1 = _mm_and_ps(xweight1,xabsmask);
1429  xweight1 = _mm_sub_ps(xconst4,xweight1);
1430  xweight1 = _mm_max_ps(xconst0,xweight1);
1431  xfactor1 = _mm_mul_ps(xval,xweight1);
1432 
1433  xval = _mm_load_ps(val+4);
1434  xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1435  xweight2 = _mm_and_ps(xweight2,xabsmask);
1436  xweight2 = _mm_sub_ps(xconst4,xweight2);
1437  xweight2 = _mm_max_ps(xconst0,xweight2);
1438  xfactor2 = _mm_mul_ps(xval,xweight2);
1439 
1440  xweight1 = _mm_add_ps(xweight1,xweight2);
1441  xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1442 
1443  _mm_store_ps(weight,xweight1);
1444  _mm_store_ps(factor,xfactor1);
1445 
1446  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1447  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1448 
1449  if (weight_sum>0) {
1450  float d = factor_sum/weight_sum;
1451  if (d>=0) *(D_tmp+v*D_width+(u-3)) = d;
1452  }
1453  }
1454  }
1455 
1456  // vertical filter
1457  for (int32_t u=3; u<D_width-3; u++) {
1458 
1459  // init
1460  for (int32_t v=0; v<7; v++)
1461  val[v] = *(D_tmp+v*D_width+u);
1462 
1463  // loop
1464  for (int32_t v=7; v<D_height; v++) {
1465 
1466  // set
1467  float val_curr = *(D_tmp+(v-3)*D_width+u);
1468  val[v%8] = *(D_tmp+v*D_width+u);
1469 
1470  xval = _mm_load_ps(val);
1471  xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1472  xweight1 = _mm_and_ps(xweight1,xabsmask);
1473  xweight1 = _mm_sub_ps(xconst4,xweight1);
1474  xweight1 = _mm_max_ps(xconst0,xweight1);
1475  xfactor1 = _mm_mul_ps(xval,xweight1);
1476 
1477  xval = _mm_load_ps(val+4);
1478  xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1479  xweight2 = _mm_and_ps(xweight2,xabsmask);
1480  xweight2 = _mm_sub_ps(xconst4,xweight2);
1481  xweight2 = _mm_max_ps(xconst0,xweight2);
1482  xfactor2 = _mm_mul_ps(xval,xweight2);
1483 
1484  xweight1 = _mm_add_ps(xweight1,xweight2);
1485  xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1486 
1487  _mm_store_ps(weight,xweight1);
1488  _mm_store_ps(factor,xfactor1);
1489 
1490  float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1491  float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1492 
1493  if (weight_sum>0) {
1494  float d = factor_sum/weight_sum;
1495  if (d>=0) *(D+(v-3)*D_width+u) = d;
1496  }
1497  }
1498  }
1499  }
1500 
1501  // free memory
1502  _mm_free(val);
1503  _mm_free(weight);
1504  _mm_free(factor);
1505  free(D_copy);
1506  free(D_tmp);
1507 }
1508 
1509 void Elas::median (float* D) {
1510 
1511  // get disparity image dimensions
1512  int32_t D_width = width;
1513  int32_t D_height = height;
1514  if (param.subsampling) {
1515  D_width = width/2;
1516  D_height = height/2;
1517  }
1518 
1519  // temporary memory
1520  float *D_temp = (float*)calloc(D_width*D_height,sizeof(float));
1521 
1522  int32_t window_size = 3;
1523 
1524  float *vals = new float[window_size*2+1];
1525  int32_t i,j;
1526  float temp;
1527 
1528  // first step: horizontal median filter
1529  for (int32_t u=window_size; u<D_width-window_size; u++) {
1530  for (int32_t v=window_size; v<D_height-window_size; v++) {
1531  if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1532  j = 0;
1533  for (int32_t u2=u-window_size; u2<=u+window_size; u2++) {
1534  temp = *(D+getAddressOffsetImage(u2,v,D_width));
1535  i = j-1;
1536  while (i>=0 && *(vals+i)>temp) {
1537  *(vals+i+1) = *(vals+i);
1538  i--;
1539  }
1540  *(vals+i+1) = temp;
1541  j++;
1542  }
1543  *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1544  } else {
1545  *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));
1546  }
1547 
1548  }
1549  }
1550 
1551  // second step: vertical median filter
1552  for (int32_t u=window_size; u<D_width-window_size; u++) {
1553  for (int32_t v=window_size; v<D_height-window_size; v++) {
1554  if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1555  j = 0;
1556  for (int32_t v2=v-window_size; v2<=v+window_size; v2++) {
1557  temp = *(D_temp+getAddressOffsetImage(u,v2,D_width));
1558  i = j-1;
1559  while (i>=0 && *(vals+i)>temp) {
1560  *(vals+i+1) = *(vals+i);
1561  i--;
1562  }
1563  *(vals+i+1) = temp;
1564  j++;
1565  }
1566  *(D+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1567  } else {
1568  *(D+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));
1569  }
1570  }
1571  }
1572 
1573  free(D_temp);
1574  free(vals);
1575 }