27 #include "descriptor.h"
33 bool Elas::process (uint8_t* I1_,uint8_t* I2_,
float* D1,
float* D2,
const int32_t* dims){
38 bpl = width + 15-(width-1)%16;
41 I1 = (uint8_t*)_mm_malloc(bpl*height*
sizeof(uint8_t),16);
42 I2 = (uint8_t*)_mm_malloc(bpl*height*
sizeof(uint8_t),16);
43 memset (I1,0,bpl*height*
sizeof(uint8_t));
44 memset (I2,0,bpl*height*
sizeof(uint8_t));
46 memcpy(I1,I1_,bpl*height*
sizeof(uint8_t));
47 memcpy(I2,I2_,bpl*height*
sizeof(uint8_t));
49 for (int32_t v=0; v<height; v++) {
50 memcpy(I1+v*bpl,I1_+v*dims[2],width*
sizeof(uint8_t));
51 memcpy(I2+v*bpl,I2_+v*dims[2],width*
sizeof(uint8_t));
56 int32_t grid_width = (int32_t)ceil((
float)width/(float)param.grid_size);
57 int32_t grid_height = (int32_t)ceil((
float)height/(float)param.grid_size);
58 int32_t grid_dims[3] = {param.disp_max+2,grid_width,grid_height};
59 int32_t* disparity_grid_1 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,
sizeof(int32_t));
60 int32_t* disparity_grid_2 = (int32_t*)calloc((param.disp_max+2)*grid_height*grid_width,
sizeof(int32_t));
63 timer.start(
"Descriptor");
65 Descriptor desc1(I1,width,height,bpl,param.subsampling);
66 Descriptor desc2(I2,width,height,bpl,param.subsampling);
69 timer.start(
"Support Matches");
71 vector<support_pt> p_support = computeSupportMatches(desc1.I_desc,desc2.I_desc);
75 if (p_support.size()>=3)
79 timer.start(
"Parallel Region #1 = {Delaunay Triangulation, Disparity Planes, Grid}");
82 vector<triangle> tri_1, tri_2;
83 #pragma omp parallel num_threads(2)
89 tri_1 = computeDelaunayTriangulation(p_support,0);
90 computeDisparityPlanes(p_support,tri_1,0);
91 createGrid(p_support,disparity_grid_1,grid_dims,0);
96 tri_2 = computeDelaunayTriangulation(p_support,1);
97 computeDisparityPlanes(p_support,tri_2,1);
98 createGrid(p_support,disparity_grid_2,grid_dims,1);
107 timer.start(
"Matching");
113 computeDisparity(p_support,tri_1,disparity_grid_1,grid_dims,desc1.I_desc,desc2.I_desc,0,D1);
115 computeDisparity(p_support,tri_2,disparity_grid_2,grid_dims,desc1.I_desc,desc2.I_desc,1,D2);
119 timer.start(
"L/R Consistency Check");
121 leftRightConsistencyCheck(D1,D2);
124 timer.start(
"Remove Small Segments");
126 removeSmallSegments(D1);
127 if (!param.postprocess_only_left)
128 removeSmallSegments(D2);
131 timer.start(
"Gap Interpolation");
133 gapInterpolation(D1);
134 if (!param.postprocess_only_left)
135 gapInterpolation(D2);
137 if (param.filter_adaptive_mean) {
139 timer.start(
"Adaptive Mean");
142 if (!param.postprocess_only_left)
146 if (param.filter_median) {
148 timer.start(
"Median");
151 if (!param.postprocess_only_left)
166 free(disparity_grid_1);
167 free(disparity_grid_2);
174 void Elas::removeInconsistentSupportPoints (int16_t* D_can,int32_t D_can_width,int32_t D_can_height) {
178 for (int32_t u_can=0; u_can<D_can_width; u_can++) {
179 for (int32_t v_can=0; v_can<D_can_height; v_can++) {
180 int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
185 for (int32_t u_can_2=u_can-param.incon_window_size; u_can_2<=u_can+param.incon_window_size; u_can_2++) {
186 for (int32_t v_can_2=v_can-param.incon_window_size; v_can_2<=v_can+param.incon_window_size; v_can_2++) {
187 if (u_can_2>=0 && v_can_2>=0 && u_can_2<D_can_width && v_can_2<D_can_height) {
188 int16_t d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
189 if (d_can_2>=0 && abs(d_can-d_can_2)<=param.incon_threshold)
196 if (support<param.incon_min_support)
197 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
203 void Elas::removeRedundantSupportPoints(int16_t* D_can,int32_t D_can_width,int32_t D_can_height,
204 int32_t redun_max_dist, int32_t redun_threshold,
bool vertical) {
207 int32_t redun_dir_u[2] = {0,0};
208 int32_t redun_dir_v[2] = {0,0};
219 for (int32_t u_can=0; u_can<D_can_width; u_can++) {
220 for (int32_t v_can=0; v_can<D_can_height; v_can++) {
221 int16_t d_can = *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width));
225 bool redundant =
true;
226 for (int32_t i=0; i<2; i++) {
229 int32_t u_can_2 = u_can;
230 int32_t v_can_2 = v_can;
232 bool support =
false;
233 for (int32_t j=0; j<redun_max_dist; j++) {
234 u_can_2 += redun_dir_u[i];
235 v_can_2 += redun_dir_v[i];
236 if (u_can_2<0 || v_can_2<0 || u_can_2>=D_can_width || v_can_2>=D_can_height)
238 d_can_2 = *(D_can+getAddressOffsetImage(u_can_2,v_can_2,D_can_width));
239 if (d_can_2>=0 && abs(d_can-d_can_2)<=redun_threshold) {
254 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
260 void Elas::addCornerSupportPoints(vector<support_pt> &p_support) {
263 vector<support_pt> p_border;
264 p_border.push_back(support_pt(0,0,0));
265 p_border.push_back(support_pt(0,height-1,0));
266 p_border.push_back(support_pt(width-1,0,0));
267 p_border.push_back(support_pt(width-1,height-1,0));
270 for (int32_t i=0; i<p_border.size(); i++) {
271 int32_t best_dist = 10000000;
272 for (int32_t j=0; j<p_support.size(); j++) {
273 int32_t du = p_border[i].u-p_support[j].u;
274 int32_t dv = p_border[i].v-p_support[j].v;
275 int32_t curr_dist = du*du+dv*dv;
276 if (curr_dist<best_dist) {
277 best_dist = curr_dist;
278 p_border[i].d = p_support[j].d;
284 p_border.push_back(support_pt(p_border[2].u+p_border[2].d,p_border[2].v,p_border[2].d));
285 p_border.push_back(support_pt(p_border[3].u+p_border[3].d,p_border[3].v,p_border[3].d));
288 for (int32_t i=0; i<p_border.size(); i++)
289 p_support.push_back(p_border[i]);
292 inline int16_t Elas::computeMatchingDisparity (
const int32_t &u,
const int32_t &v,uint8_t* I1_desc,uint8_t* I2_desc,
const bool &right_image) {
294 const int32_t u_step = 2;
295 const int32_t v_step = 2;
296 const int32_t window_size = 3;
298 int32_t desc_offset_1 = -16*u_step-16*width*v_step;
299 int32_t desc_offset_2 = +16*u_step-16*width*v_step;
300 int32_t desc_offset_3 = -16*u_step+16*width*v_step;
301 int32_t desc_offset_4 = +16*u_step+16*width*v_step;
303 __m128i xmm1,xmm2,xmm3,xmm4,xmm5,xmm6;
306 if (u>=window_size+u_step && u<=width-window_size-1-u_step && v>=window_size+v_step && v<=height-window_size-1-v_step) {
309 int32_t line_offset = 16*width*v;
310 uint8_t *I1_line_addr,*I2_line_addr;
312 I1_line_addr = I1_desc+line_offset;
313 I2_line_addr = I2_desc+line_offset;
315 I1_line_addr = I2_desc+line_offset;
316 I2_line_addr = I1_desc+line_offset;
320 uint8_t* I1_block_addr = I1_line_addr+16*u;
321 uint8_t* I2_block_addr;
325 for (int32_t i=0; i<16; i++)
326 sum += abs((int32_t)(*(I1_block_addr+i))-128);
327 if (sum<param.support_texture)
331 xmm1 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_1));
332 xmm2 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_2));
333 xmm3 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_3));
334 xmm4 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_4));
340 int16_t min_1_E = 32767;
341 int16_t min_1_d = -1;
342 int16_t min_2_E = 32767;
343 int16_t min_2_d = -1;
346 int32_t disp_min_valid = max(param.disp_min,0);
347 int32_t disp_max_valid = param.disp_max;
348 if (!right_image) disp_max_valid = min(param.disp_max,u-window_size-u_step);
349 else disp_max_valid = min(param.disp_max,width-u-window_size-u_step);
352 if (disp_max_valid-disp_min_valid<10)
356 for (int16_t d=disp_min_valid; d<=disp_max_valid; d++) {
359 if (!right_image) u_warp = u-d;
363 I2_block_addr = I2_line_addr+16*u_warp;
366 xmm6 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_1));
367 xmm6 = _mm_sad_epu8(xmm1,xmm6);
368 xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_2));
369 xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm2,xmm5),xmm6);
370 xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_3));
371 xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm3,xmm5),xmm6);
372 xmm5 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_4));
373 xmm6 = _mm_add_epi16(_mm_sad_epu8(xmm4,xmm5),xmm6);
374 sum = _mm_extract_epi16(xmm6,0)+_mm_extract_epi16(xmm6,4);
380 }
else if (sum<min_2_E) {
387 if (min_1_d>=0 && min_2_d>=0 && (
float)min_1_E<param.support_threshold*(
float)min_2_E)
396 vector<Elas::support_pt> Elas::computeSupportMatches (uint8_t* I1_desc,uint8_t* I2_desc) {
400 int32_t D_candidate_stepsize = param.candidate_stepsize;
401 if (param.subsampling)
402 D_candidate_stepsize += D_candidate_stepsize%2;
405 int32_t D_can_width = 0;
406 int32_t D_can_height = 0;
407 for (int32_t u=0; u<width; u+=D_candidate_stepsize) D_can_width++;
408 for (int32_t v=0; v<height; v+=D_candidate_stepsize) D_can_height++;
409 int16_t* D_can = (int16_t*)calloc(D_can_width*D_can_height,
sizeof(int16_t));
414 int32_t u_can, v_can;
415 int32_t lr_threshold = param.lr_threshold;
416 vector<support_pt> p_support;
417 vector<support_pt> partial_p_support[2];
419 #pragma omp parallel default(none) num_threads(2) private(u_can, v_can, u, d, v, d2) shared(partial_p_support,lr_threshold, D_can, D_can_width, D_can_height, D_candidate_stepsize, I1_desc, I2_desc)
421 int tid = omp_get_thread_num();
423 for (v_can=1; v_can<D_can_height; v_can++) {
424 v = v_can*D_candidate_stepsize;
425 for (u_can=1; u_can<D_can_width; u_can++) {
426 u = u_can*D_candidate_stepsize;
429 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = -1;
432 d = computeMatchingDisparity(u,v,I1_desc,I2_desc,
false);
436 d2 = computeMatchingDisparity(u-d,v,I1_desc,I2_desc,
true);
437 if (d2>=0 && abs(d-d2)<=lr_threshold)
438 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width)) = d;
447 removeInconsistentSupportPoints(D_can,D_can_width,D_can_height);
453 removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,
true);
454 removeRedundantSupportPoints(D_can,D_can_width,D_can_height,5,1,
false);
463 for (int32_t v_can=1; v_can<D_can_height; v_can++)
464 for (int32_t u_can=1; u_can<D_can_width; u_can++)
465 if (*(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))>=0)
466 partial_p_support[tid].push_back(support_pt(u_can*D_candidate_stepsize,
467 v_can*D_candidate_stepsize,
468 *(D_can+getAddressOffsetImage(u_can,v_can,D_can_width))));
470 p_support = partial_p_support[0];
471 p_support.insert(p_support.end(),partial_p_support[1].begin(),partial_p_support[1].end());
475 if (param.add_corners)
476 addCornerSupportPoints(p_support);
486 vector<Elas::triangle> Elas::computeDelaunayTriangulation (vector<support_pt> p_support,int32_t right_image) {
489 struct triangulateio in, out;
493 in.numberofpoints = p_support.size();
494 in.pointlist = (
float*)malloc(in.numberofpoints*2*
sizeof(
float));
497 for (int32_t i=0; i<p_support.size(); i++) {
498 in.pointlist[k++] = p_support[i].u;
499 in.pointlist[k++] = p_support[i].v;
502 for (int32_t i=0; i<p_support.size(); i++) {
503 in.pointlist[k++] = p_support[i].u-p_support[i].d;
504 in.pointlist[k++] = p_support[i].v;
507 in.numberofpointattributes = 0;
508 in.pointattributelist = NULL;
509 in.pointmarkerlist = NULL;
510 in.numberofsegments = 0;
511 in.numberofholes = 0;
512 in.numberofregions = 0;
513 in.regionlist = NULL;
516 out.pointlist = NULL;
517 out.pointattributelist = NULL;
518 out.pointmarkerlist = NULL;
519 out.trianglelist = NULL;
520 out.triangleattributelist = NULL;
521 out.neighborlist = NULL;
522 out.segmentlist = NULL;
523 out.segmentmarkerlist = NULL;
525 out.edgemarkerlist = NULL;
528 char parameters[] =
"zQB";
529 triangulate(parameters, &in, &out, NULL);
532 vector<triangle> tri;
534 for (int32_t i=0; i<out.numberoftriangles; i++) {
535 tri.push_back(triangle(out.trianglelist[k],out.trianglelist[k+1],out.trianglelist[k+2]));
542 free(out.trianglelist);
548 void Elas::computeDisparityPlanes (vector<support_pt> p_support,vector<triangle> &tri,int32_t right_image) {
555 for (int32_t i=0; i<tri.size(); i++) {
558 int32_t c1 = tri[i].c1;
559 int32_t c2 = tri[i].c2;
560 int32_t c3 = tri[i].c3;
563 A.val[0][0] = p_support[c1].u;
564 A.val[1][0] = p_support[c2].u;
565 A.val[2][0] = p_support[c3].u;
566 A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
567 A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
568 A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
571 b.val[0][0] = p_support[c1].d;
572 b.val[1][0] = p_support[c2].d;
573 b.val[2][0] = p_support[c3].d;
579 tri[i].t1a = b.val[0][0];
580 tri[i].t1b = b.val[1][0];
581 tri[i].t1c = b.val[2][0];
591 A.val[0][0] = p_support[c1].u-p_support[c1].d;
592 A.val[1][0] = p_support[c2].u-p_support[c2].d;
593 A.val[2][0] = p_support[c3].u-p_support[c3].d;
594 A.val[0][1] = p_support[c1].v; A.val[0][2] = 1;
595 A.val[1][1] = p_support[c2].v; A.val[1][2] = 1;
596 A.val[2][1] = p_support[c3].v; A.val[2][2] = 1;
599 b.val[0][0] = p_support[c1].d;
600 b.val[1][0] = p_support[c2].d;
601 b.val[2][0] = p_support[c3].d;
607 tri[i].t2a = b.val[0][0];
608 tri[i].t2b = b.val[1][0];
609 tri[i].t2c = b.val[2][0];
620 void Elas::createGrid(vector<support_pt> p_support,int32_t* disparity_grid,int32_t* grid_dims,
bool right_image) {
623 int32_t grid_width = grid_dims[1];
624 int32_t grid_height = grid_dims[2];
627 int32_t* temp1 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,
sizeof(int32_t));
628 int32_t* temp2 = (int32_t*)calloc((param.disp_max+1)*grid_height*grid_width,
sizeof(int32_t));
631 for (int32_t i=0; i<p_support.size(); i++) {
634 int32_t x_curr = p_support[i].u;
635 int32_t y_curr = p_support[i].v;
636 int32_t d_curr = p_support[i].d;
637 int32_t d_min = max(d_curr-1,0);
638 int32_t d_max = min(d_curr+1,param.disp_max);
641 for (int32_t d=d_min; d<=d_max; d++) {
644 x = floor((
float)(x_curr/param.grid_size));
646 x = floor((
float)(x_curr-d_curr)/(
float)param.grid_size);
647 int32_t y = floor((
float)y_curr/(
float)param.grid_size);
650 if (x>=0 && x<grid_width &&y>=0 && y<grid_height) {
651 int32_t addr = getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1);
658 const int32_t* tl = temp1 + (0*grid_width+0)*(param.disp_max+1);
659 const int32_t* tc = temp1 + (0*grid_width+1)*(param.disp_max+1);
660 const int32_t* tr = temp1 + (0*grid_width+2)*(param.disp_max+1);
661 const int32_t* cl = temp1 + (1*grid_width+0)*(param.disp_max+1);
662 const int32_t* cc = temp1 + (1*grid_width+1)*(param.disp_max+1);
663 const int32_t* cr = temp1 + (1*grid_width+2)*(param.disp_max+1);
664 const int32_t* bl = temp1 + (2*grid_width+0)*(param.disp_max+1);
665 const int32_t* bc = temp1 + (2*grid_width+1)*(param.disp_max+1);
666 const int32_t* br = temp1 + (2*grid_width+2)*(param.disp_max+1);
668 int32_t* result = temp2 + (1*grid_width+1)*(param.disp_max+1);
669 int32_t* end_input = temp1 + grid_width*grid_height*(param.disp_max+1);
672 for( ; br != end_input; tl++, tc++, tr++, cl++, cc++, cr++, bl++, bc++, br++, result++ )
673 *result = *tl | *tc | *tr | *cl | *cc | *cr | *bl | *bc | *br;
676 for (int32_t x=0; x<grid_width; x++) {
677 for (int32_t y=0; y<grid_height; y++) {
680 int32_t curr_ind = 1;
683 for (int32_t d=0; d<=param.disp_max; d++) {
686 if (*(temp2+getAddressOffsetGrid(x,y,d,grid_width,param.disp_max+1))>0) {
687 *(disparity_grid+getAddressOffsetGrid(x,y,curr_ind,grid_width,param.disp_max+2))=d;
693 *(disparity_grid+getAddressOffsetGrid(x,y,0,grid_width,param.disp_max+2))=curr_ind-1;
702 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,
const int32_t &d,
const int32_t &w,
703 const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
704 xmm2 = _mm_load_si128(I2_block_addr);
705 xmm2 = _mm_sad_epu8(xmm1,xmm2);
706 val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4)+w;
713 inline void Elas::updatePosteriorMinimum(__m128i* I2_block_addr,
const int32_t &d,
714 const __m128i &xmm1,__m128i &xmm2,int32_t &val,int32_t &min_val,int32_t &min_d) {
715 xmm2 = _mm_load_si128(I2_block_addr);
716 xmm2 = _mm_sad_epu8(xmm1,xmm2);
717 val = _mm_extract_epi16(xmm2,0)+_mm_extract_epi16(xmm2,4);
724 inline void Elas::findMatch(int32_t &u,int32_t &v,
float &plane_a,
float &plane_b,
float &plane_c,
725 int32_t* disparity_grid,int32_t *grid_dims,uint8_t* I1_desc,uint8_t* I2_desc,
726 int32_t *P,int32_t &plane_radius,
bool &valid,
bool &right_image,
float* D){
729 const int32_t disp_num = grid_dims[0]-1;
730 const int32_t window_size = 2;
734 if (param.subsampling) d_addr = getAddressOffsetImage(u/2,v/2,width/2);
735 else d_addr = getAddressOffsetImage(u,v,width);
738 if (u<window_size || u>=width-window_size)
742 int32_t line_offset = 16*width*max(min(v,height-3),2);
743 uint8_t *I1_line_addr,*I2_line_addr;
745 I1_line_addr = I1_desc+line_offset;
746 I2_line_addr = I2_desc+line_offset;
748 I1_line_addr = I2_desc+line_offset;
749 I2_line_addr = I1_desc+line_offset;
753 uint8_t* I1_block_addr = I1_line_addr+16*u;
757 for (int32_t i=0; i<16; i++)
758 sum += abs((int32_t)(*(I1_block_addr+i))-128);
759 if (sum<param.match_texture)
763 int32_t d_plane = (int32_t)(plane_a*(
float)u+plane_b*(float)v+plane_c);
764 int32_t d_plane_min = max(d_plane-plane_radius,0);
765 int32_t d_plane_max = min(d_plane+plane_radius,disp_num-1);
768 int32_t grid_x = (int32_t)floor((
float)u/(float)param.grid_size);
769 int32_t grid_y = (int32_t)floor((
float)v/(float)param.grid_size);
770 uint32_t grid_addr = getAddressOffsetGrid(grid_x,grid_y,0,grid_dims[1],grid_dims[0]);
771 int32_t num_grid = *(disparity_grid+grid_addr);
772 int32_t* d_grid = disparity_grid+grid_addr+1;
775 int32_t d_curr, u_warp, val;
776 int32_t min_val = 10000;
778 __m128i xmm1 = _mm_load_si128((__m128i*)I1_block_addr);
783 for (int32_t i=0; i<num_grid; i++) {
785 if (d_curr<d_plane_min || d_curr>d_plane_max) {
787 if (u_warp<window_size || u_warp>=width-window_size)
789 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
792 for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
794 if (u_warp<window_size || u_warp>=width-window_size)
796 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
801 for (int32_t i=0; i<num_grid; i++) {
803 if (d_curr<d_plane_min || d_curr>d_plane_max) {
805 if (u_warp<window_size || u_warp>=width-window_size)
807 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,xmm1,xmm2,val,min_val,min_d);
810 for (d_curr=d_plane_min; d_curr<=d_plane_max; d_curr++) {
812 if (u_warp<window_size || u_warp>=width-window_size)
814 updatePosteriorMinimum((__m128i*)(I2_line_addr+16*u_warp),d_curr,valid?*(P+abs(d_curr-d_plane)):0,xmm1,xmm2,val,min_val,min_d);
819 if (min_d>=0) *(D+d_addr) = min_d;
820 else *(D+d_addr) = -1;
824 void Elas::computeDisparity(vector<support_pt> p_support,vector<triangle> tri,int32_t* disparity_grid,int32_t *grid_dims,
825 uint8_t* I1_desc,uint8_t* I2_desc,
bool right_image,
float* D) {
829 int disp_num = grid_dims[0]-1;
832 int32_t window_size = 2;
835 if (param.subsampling) {
836 for (int32_t i=0; i<(width/2)*(height/2); i++)
839 for (int32_t i=0; i<width*height; i++)
844 float two_sigma_squared = 2*param.sigma*param.sigma;
845 int32_t* P =
new int32_t[disp_num];
846 for (int32_t delta_d=0; delta_d<disp_num; delta_d++)
847 P[delta_d] = (int32_t)((-log(param.gamma+exp(-delta_d*delta_d/two_sigma_squared))+log(param.gamma))/param.beta);
848 int32_t plane_radius = (int32_t)max((
float)ceil(param.sigma*param.sradius),(float)2.0);
852 float plane_a,plane_b,plane_c,plane_d;
856 #pragma omp parallel for num_threads(3) default(none)\
857 private(i, plane_a, plane_b, plane_c, plane_d, c1, c2, c3)\
858 shared(P, plane_radius, two_sigma_squared, disp_num, window_size, p_support, tri, disparity_grid, grid_dims, I1_desc, I2_desc, right_image, D)
859 for (i=0; i<tri.size(); i++) {
865 plane_a = tri[i].t1a;
866 plane_b = tri[i].t1b;
867 plane_c = tri[i].t1c;
868 plane_d = tri[i].t2a;
870 plane_a = tri[i].t2a;
871 plane_b = tri[i].t2b;
872 plane_c = tri[i].t2c;
873 plane_d = tri[i].t1a;
884 tri_u[0] = p_support[c1].u;
885 tri_u[1] = p_support[c2].u;
886 tri_u[2] = p_support[c3].u;
888 tri_u[0] = p_support[c1].u-p_support[c1].d;
889 tri_u[1] = p_support[c2].u-p_support[c2].d;
890 tri_u[2] = p_support[c3].u-p_support[c3].d;
892 float tri_v[3] = {p_support[c1].v,p_support[c2].v,p_support[c3].v};
894 for (uint32_t j=0; j<3; j++) {
895 for (uint32_t k=0; k<j; k++) {
896 if (tri_u[k]>tri_u[j]) {
897 float tri_u_temp = tri_u[j]; tri_u[j] = tri_u[k]; tri_u[k] = tri_u_temp;
898 float tri_v_temp = tri_v[j]; tri_v[j] = tri_v[k]; tri_v[k] = tri_v_temp;
904 float A_u = tri_u[0];
float A_v = tri_v[0];
905 float B_u = tri_u[1];
float B_v = tri_v[1];
906 float C_u = tri_u[2];
float C_v = tri_v[2];
909 float AB_a = 0;
float AC_a = 0;
float BC_a = 0;
910 if ((int32_t)(A_u)!=(int32_t)(B_u)) AB_a = (A_v-B_v)/(A_u-B_u);
911 if ((int32_t)(A_u)!=(int32_t)(C_u)) AC_a = (A_v-C_v)/(A_u-C_u);
912 if ((int32_t)(B_u)!=(int32_t)(C_u)) BC_a = (B_v-C_v)/(B_u-C_u);
913 float AB_b = A_v-AB_a*A_u;
914 float AC_b = A_v-AC_a*A_u;
915 float BC_b = B_v-BC_a*B_u;
919 bool valid = fabs(plane_a)<0.7 && fabs(plane_d)<0.7;
922 if ((int32_t)(A_u)!=(int32_t)(B_u)) {
923 for (int32_t u=max((int32_t)A_u,0); u<min((int32_t)B_u,width); u++){
924 if (!param.subsampling || u%2==0) {
925 int32_t v_1 = (uint32_t)(AC_a*(
float)u+AC_b);
926 int32_t v_2 = (uint32_t)(AB_a*(
float)u+AB_b);
927 for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
928 if (!param.subsampling || v%2==0) {
929 findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
930 I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
937 if ((int32_t)(B_u)!=(int32_t)(C_u)) {
938 for (int32_t u=max((int32_t)B_u,0); u<min((int32_t)C_u,width); u++){
939 if (!param.subsampling || u%2==0) {
940 int32_t v_1 = (uint32_t)(AC_a*(
float)u+AC_b);
941 int32_t v_2 = (uint32_t)(BC_a*(
float)u+BC_b);
942 for (int32_t v=min(v_1,v_2); v<max(v_1,v_2); v++)
943 if (!param.subsampling || v%2==0) {
944 findMatch(u,v,plane_a,plane_b,plane_c,disparity_grid,grid_dims,
945 I1_desc,I2_desc,P,plane_radius,valid,right_image,D);
956 void Elas::leftRightConsistencyCheck(
float* D1,
float* D2) {
959 int32_t D_width = width;
960 int32_t D_height = height;
961 if (param.subsampling) {
967 float* D1_copy = (
float*)malloc(D_width*D_height*
sizeof(
float));
968 float* D2_copy = (
float*)malloc(D_width*D_height*
sizeof(
float));
969 memcpy(D1_copy,D1,D_width*D_height*
sizeof(
float));
970 memcpy(D2_copy,D2,D_width*D_height*
sizeof(
float));
973 uint32_t addr,addr_warp;
974 float u_warp_1,u_warp_2,d1,d2;
977 for (int32_t u=0; u<D_width; u++) {
978 for (int32_t v=0; v<D_height; v++) {
981 addr = getAddressOffsetImage(u,v,D_width);
982 d1 = *(D1_copy+addr);
983 d2 = *(D2_copy+addr);
984 if (param.subsampling) {
985 u_warp_1 = (float)u-d1/2;
986 u_warp_2 = (float)u+d2/2;
988 u_warp_1 = (float)u-d1;
989 u_warp_2 = (float)u+d2;
994 if (d1>=0 && u_warp_1>=0 && u_warp_1<D_width) {
997 addr_warp = getAddressOffsetImage((int32_t)u_warp_1,v,D_width);
1000 if (fabs(*(D2_copy+addr_warp)-d1)>param.lr_threshold)
1008 if (d2>=0 && u_warp_2>=0 && u_warp_2<D_width) {
1011 addr_warp = getAddressOffsetImage((int32_t)u_warp_2,v,D_width);
1014 if (fabs(*(D1_copy+addr_warp)-d2)>param.lr_threshold)
1028 void Elas::removeSmallSegments (
float* D) {
1031 int32_t D_width = width;
1032 int32_t D_height = height;
1033 int32_t D_speckle_size = param.speckle_size;
1034 if (param.subsampling) {
1036 D_height = height/2;
1037 D_speckle_size = sqrt((
float)param.speckle_size)*2;
1041 int32_t *D_done = (int32_t*)calloc(D_width*D_height,
sizeof(int32_t));
1042 int32_t *seg_list_u = (int32_t*)calloc(D_width*D_height,
sizeof(int32_t));
1043 int32_t *seg_list_v = (int32_t*)calloc(D_width*D_height,
sizeof(int32_t));
1044 int32_t seg_list_count;
1045 int32_t seg_list_curr;
1046 int32_t u_neighbor[4];
1047 int32_t v_neighbor[4];
1052 int32_t addr_start, addr_curr, addr_neighbor;
1055 for (int32_t u=0; u<D_width; u++) {
1056 for (int32_t v=0; v<D_height; v++) {
1059 addr_start = getAddressOffsetImage(u,v,D_width);
1062 if (*(D_done+addr_start)==0) {
1066 *(seg_list_u+0) = u;
1067 *(seg_list_v+0) = v;
1074 while (seg_list_curr<seg_list_count) {
1077 u_seg_curr = *(seg_list_u+seg_list_curr);
1078 v_seg_curr = *(seg_list_v+seg_list_curr);
1081 addr_curr = getAddressOffsetImage(u_seg_curr,v_seg_curr,D_width);
1084 u_neighbor[0] = u_seg_curr-1; v_neighbor[0] = v_seg_curr;
1085 u_neighbor[1] = u_seg_curr+1; v_neighbor[1] = v_seg_curr;
1086 u_neighbor[2] = u_seg_curr; v_neighbor[2] = v_seg_curr-1;
1087 u_neighbor[3] = u_seg_curr; v_neighbor[3] = v_seg_curr+1;
1090 for (int32_t i=0; i<4; i++) {
1093 if (u_neighbor[i]>=0 && v_neighbor[i]>=0 && u_neighbor[i]<D_width && v_neighbor[i]<D_height) {
1096 addr_neighbor = getAddressOffsetImage(u_neighbor[i],v_neighbor[i],D_width);
1099 if (*(D_done+addr_neighbor)==0 && *(D+addr_neighbor)>=0) {
1103 if (fabs(*(D+addr_curr)-*(D+addr_neighbor))<=param.speckle_sim_threshold) {
1106 *(seg_list_u+seg_list_count) = u_neighbor[i];
1107 *(seg_list_v+seg_list_count) = v_neighbor[i];
1113 *(D_done+addr_neighbor) = 1;
1124 *(D_done+addr_curr) = 1;
1129 if (seg_list_count<D_speckle_size) {
1132 for (int32_t i=0; i<seg_list_count; i++) {
1133 addr_curr = getAddressOffsetImage(*(seg_list_u+i),*(seg_list_v+i),D_width);
1134 *(D+addr_curr) = -10;
1148 void Elas::gapInterpolation(
float* D) {
1151 int32_t D_width = width;
1152 int32_t D_height = height;
1153 int32_t D_ipol_gap_width = param.ipol_gap_width;
1154 if (param.subsampling) {
1156 D_height = height/2;
1157 D_ipol_gap_width = param.ipol_gap_width/2+1;
1161 float discon_threshold = 3.0;
1164 int32_t count,addr,v_first,v_last,u_first,u_last;
1169 for (int32_t v=0; v<D_height; v++) {
1175 for (int32_t u=0; u<D_width; u++) {
1178 addr = getAddressOffsetImage(u,v,D_width);
1184 if (count>=1 && count<=D_ipol_gap_width) {
1191 if (u_first>0 && u_last<D_width-1) {
1194 d1 = *(D+getAddressOffsetImage(u_first-1,v,D_width));
1195 d2 = *(D+getAddressOffsetImage(u_last+1,v,D_width));
1196 if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1197 else d_ipol = min(d1,d2);
1200 for (int32_t u_curr=u_first; u_curr<=u_last; u_curr++)
1201 *(D+getAddressOffsetImage(u_curr,v,D_width)) = d_ipol;
1216 if (param.add_corners) {
1219 for (int32_t u=0; u<D_width; u++) {
1222 addr = getAddressOffsetImage(u,v,D_width);
1226 for (int32_t u2=max(u-D_ipol_gap_width,0); u2<u; u2++)
1227 *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1233 for (int32_t u=D_width-1; u>=0; u--) {
1236 addr = getAddressOffsetImage(u,v,D_width);
1240 for (int32_t u2=u; u2<=min(u+D_ipol_gap_width,D_width-1); u2++)
1241 *(D+getAddressOffsetImage(u2,v,D_width)) = *(D+addr);
1250 for (int32_t u=0; u<D_width; u++) {
1256 for (int32_t v=0; v<D_height; v++) {
1259 addr = getAddressOffsetImage(u,v,D_width);
1265 if (count>=1 && count<=D_ipol_gap_width) {
1272 if (v_first>0 && v_last<D_height-1) {
1275 d1 = *(D+getAddressOffsetImage(u,v_first-1,D_width));
1276 d2 = *(D+getAddressOffsetImage(u,v_last+1,D_width));
1277 if (fabs(d1-d2)<discon_threshold) d_ipol = (d1+d2)/2;
1278 else d_ipol = min(d1,d2);
1281 for (int32_t v_curr=v_first; v_curr<=v_last; v_curr++)
1282 *(D+getAddressOffsetImage(u,v_curr,D_width)) = d_ipol;
1299 void Elas::adaptiveMean (
float* D) {
1302 int32_t D_width = width;
1303 int32_t D_height = height;
1304 if (param.subsampling) {
1306 D_height = height/2;
1310 float* D_copy = (
float*)malloc(D_width*D_height*
sizeof(
float));
1311 float* D_tmp = (
float*)malloc(D_width*D_height*
sizeof(
float));
1312 memcpy(D_copy,D,D_width*D_height*
sizeof(
float));
1316 for (int32_t i=0; i<D_width*D_height; i++) {
1323 __m128 xconst0 = _mm_set1_ps(0);
1324 __m128 xconst4 = _mm_set1_ps(4);
1325 __m128 xval,xweight1,xweight2,xfactor1,xfactor2;
1327 float *val = (
float *)_mm_malloc(8*
sizeof(
float),16);
1328 float *weight = (
float*)_mm_malloc(4*
sizeof(
float),16);
1329 float *factor = (
float*)_mm_malloc(4*
sizeof(
float),16);
1332 __m128 xabsmask = _mm_set1_ps(0x7FFFFFFF);
1335 if (param.subsampling) {
1338 for (int32_t v=3; v<D_height-3; v++) {
1341 for (int32_t u=0; u<3; u++)
1342 val[u] = *(D_copy+v*D_width+u);
1345 for (int32_t u=3; u<D_width; u++) {
1348 float val_curr = *(D_copy+v*D_width+(u-1));
1349 val[u%4] = *(D_copy+v*D_width+u);
1351 xval = _mm_load_ps(val);
1352 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1353 xweight1 = _mm_and_ps(xweight1,xabsmask);
1354 xweight1 = _mm_sub_ps(xconst4,xweight1);
1355 xweight1 = _mm_max_ps(xconst0,xweight1);
1356 xfactor1 = _mm_mul_ps(xval,xweight1);
1358 _mm_store_ps(weight,xweight1);
1359 _mm_store_ps(factor,xfactor1);
1361 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1362 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1365 float d = factor_sum/weight_sum;
1366 if (d>=0) *(D_tmp+v*D_width+(u-1)) = d;
1372 for (int32_t u=3; u<D_width-3; u++) {
1375 for (int32_t v=0; v<3; v++)
1376 val[v] = *(D_tmp+v*D_width+u);
1379 for (int32_t v=3; v<D_height; v++) {
1382 float val_curr = *(D_tmp+(v-1)*D_width+u);
1383 val[v%4] = *(D_tmp+v*D_width+u);
1385 xval = _mm_load_ps(val);
1386 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1387 xweight1 = _mm_and_ps(xweight1,xabsmask);
1388 xweight1 = _mm_sub_ps(xconst4,xweight1);
1389 xweight1 = _mm_max_ps(xconst0,xweight1);
1390 xfactor1 = _mm_mul_ps(xval,xweight1);
1392 _mm_store_ps(weight,xweight1);
1393 _mm_store_ps(factor,xfactor1);
1395 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1396 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1399 float d = factor_sum/weight_sum;
1400 if (d>=0) *(D+(v-1)*D_width+u) = d;
1410 for (int32_t v=3; v<D_height-3; v++) {
1413 for (int32_t u=0; u<7; u++)
1414 val[u] = *(D_copy+v*D_width+u);
1417 for (int32_t u=7; u<D_width; u++) {
1420 float val_curr = *(D_copy+v*D_width+(u-3));
1421 val[u%8] = *(D_copy+v*D_width+u);
1423 xval = _mm_load_ps(val);
1424 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1425 xweight1 = _mm_and_ps(xweight1,xabsmask);
1426 xweight1 = _mm_sub_ps(xconst4,xweight1);
1427 xweight1 = _mm_max_ps(xconst0,xweight1);
1428 xfactor1 = _mm_mul_ps(xval,xweight1);
1430 xval = _mm_load_ps(val+4);
1431 xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1432 xweight2 = _mm_and_ps(xweight2,xabsmask);
1433 xweight2 = _mm_sub_ps(xconst4,xweight2);
1434 xweight2 = _mm_max_ps(xconst0,xweight2);
1435 xfactor2 = _mm_mul_ps(xval,xweight2);
1437 xweight1 = _mm_add_ps(xweight1,xweight2);
1438 xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1440 _mm_store_ps(weight,xweight1);
1441 _mm_store_ps(factor,xfactor1);
1443 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1444 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1447 float d = factor_sum/weight_sum;
1448 if (d>=0) *(D_tmp+v*D_width+(u-3)) = d;
1454 for (int32_t u=3; u<D_width-3; u++) {
1457 for (int32_t v=0; v<7; v++)
1458 val[v] = *(D_tmp+v*D_width+u);
1461 for (int32_t v=7; v<D_height; v++) {
1464 float val_curr = *(D_tmp+(v-3)*D_width+u);
1465 val[v%8] = *(D_tmp+v*D_width+u);
1467 xval = _mm_load_ps(val);
1468 xweight1 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1469 xweight1 = _mm_and_ps(xweight1,xabsmask);
1470 xweight1 = _mm_sub_ps(xconst4,xweight1);
1471 xweight1 = _mm_max_ps(xconst0,xweight1);
1472 xfactor1 = _mm_mul_ps(xval,xweight1);
1474 xval = _mm_load_ps(val+4);
1475 xweight2 = _mm_sub_ps(xval,_mm_set1_ps(val_curr));
1476 xweight2 = _mm_and_ps(xweight2,xabsmask);
1477 xweight2 = _mm_sub_ps(xconst4,xweight2);
1478 xweight2 = _mm_max_ps(xconst0,xweight2);
1479 xfactor2 = _mm_mul_ps(xval,xweight2);
1481 xweight1 = _mm_add_ps(xweight1,xweight2);
1482 xfactor1 = _mm_add_ps(xfactor1,xfactor2);
1484 _mm_store_ps(weight,xweight1);
1485 _mm_store_ps(factor,xfactor1);
1487 float weight_sum = weight[0]+weight[1]+weight[2]+weight[3];
1488 float factor_sum = factor[0]+factor[1]+factor[2]+factor[3];
1491 float d = factor_sum/weight_sum;
1492 if (d>=0) *(D+(v-3)*D_width+u) = d;
1506 void Elas::median (
float* D) {
1509 int32_t D_width = width;
1510 int32_t D_height = height;
1511 if (param.subsampling) {
1513 D_height = height/2;
1517 float *D_temp = (
float*)calloc(D_width*D_height,
sizeof(
float));
1519 int32_t window_size = 3;
1521 float *vals =
new float[window_size*2+1];
1526 for (int32_t u=window_size; u<D_width-window_size; u++) {
1527 for (int32_t v=window_size; v<D_height-window_size; v++) {
1528 if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1530 for (int32_t u2=u-window_size; u2<=u+window_size; u2++) {
1531 temp = *(D+getAddressOffsetImage(u2,v,D_width));
1533 while (i>=0 && *(vals+i)>temp) {
1534 *(vals+i+1) = *(vals+i);
1540 *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1542 *(D_temp+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));
1549 for (int32_t u=window_size; u<D_width-window_size; u++) {
1550 for (int32_t v=window_size; v<D_height-window_size; v++) {
1551 if (*(D+getAddressOffsetImage(u,v,D_width))>=0) {
1553 for (int32_t v2=v-window_size; v2<=v+window_size; v2++) {
1554 temp = *(D_temp+getAddressOffsetImage(u,v2,D_width));
1556 while (i>=0 && *(vals+i)>temp) {
1557 *(vals+i+1) = *(vals+i);
1563 *(D+getAddressOffsetImage(u,v,D_width)) = *(vals+window_size);
1565 *(D+getAddressOffsetImage(u,v,D_width)) = *(D+getAddressOffsetImage(u,v,D_width));